xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.g16.a16.dim.ll (revision 9e9907f1cfa424366fba58d9520f9305b537cec9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
3; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10GISEL %s
4; TODO: global-isel produces more code - there will need to be some more combines in the postregbankselectcombine phase
5; Depends on some other changes to pass this test - those are in review separately
6
7define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s) {
8; GFX10-LABEL: sample_d_1d:
9; GFX10:       ; %bb.0: ; %main_body
10; GFX10-NEXT:    image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
11; GFX10-NEXT:    s_waitcnt vmcnt(0)
12; GFX10-NEXT:    ; return to shader part epilog
13;
14; GFX10GISEL-LABEL: sample_d_1d:
15; GFX10GISEL:       ; %bb.0: ; %main_body
16; GFX10GISEL-NEXT:    image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
17; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
18; GFX10GISEL-NEXT:    ; return to shader part epilog
19main_body:
20  %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
21  ret <4 x float> %v
22}
23
24define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) {
25; GFX10-LABEL: sample_d_2d:
26; GFX10:       ; %bb.0: ; %main_body
27; GFX10-NEXT:    v_perm_b32 v4, v5, v4, 0x5040100
28; GFX10-NEXT:    image_sample_d v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
29; GFX10-NEXT:    s_waitcnt vmcnt(0)
30; GFX10-NEXT:    ; return to shader part epilog
31;
32; GFX10GISEL-LABEL: sample_d_2d:
33; GFX10GISEL:       ; %bb.0: ; %main_body
34; GFX10GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
35; GFX10GISEL-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
36; GFX10GISEL-NEXT:    image_sample_d v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
37; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
38; GFX10GISEL-NEXT:    ; return to shader part epilog
39main_body:
40  %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
41  ret <4 x float> %v
42}
43
44define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, half %s, half %t, half %r) {
45; GFX10-LABEL: sample_d_3d:
46; GFX10:       ; %bb.0: ; %main_body
47; GFX10-NEXT:    v_mov_b32_e32 v15, v8
48; GFX10-NEXT:    v_mov_b32_e32 v13, v5
49; GFX10-NEXT:    v_mov_b32_e32 v12, v4
50; GFX10-NEXT:    v_mov_b32_e32 v11, v3
51; GFX10-NEXT:    v_mov_b32_e32 v10, v2
52; GFX10-NEXT:    v_mov_b32_e32 v9, v1
53; GFX10-NEXT:    v_mov_b32_e32 v8, v0
54; GFX10-NEXT:    v_perm_b32 v14, v7, v6, 0x5040100
55; GFX10-NEXT:    image_sample_d v[0:3], v[8:15], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
56; GFX10-NEXT:    s_waitcnt vmcnt(0)
57; GFX10-NEXT:    ; return to shader part epilog
58;
59; GFX10GISEL-LABEL: sample_d_3d:
60; GFX10GISEL:       ; %bb.0: ; %main_body
61; GFX10GISEL-NEXT:    v_mov_b32_e32 v9, v7
62; GFX10GISEL-NEXT:    v_and_b32_e32 v6, 0xffff, v6
63; GFX10GISEL-NEXT:    v_mov_b32_e32 v7, v8
64; GFX10GISEL-NEXT:    v_lshl_or_b32 v6, v9, 16, v6
65; GFX10GISEL-NEXT:    image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
66; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
67; GFX10GISEL-NEXT:    ; return to shader part epilog
68main_body:
69  %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
70  ret <4 x float> %v
71}
72
73define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s) {
74; GFX10-LABEL: sample_c_d_1d:
75; GFX10:       ; %bb.0: ; %main_body
76; GFX10-NEXT:    image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
77; GFX10-NEXT:    s_waitcnt vmcnt(0)
78; GFX10-NEXT:    ; return to shader part epilog
79;
80; GFX10GISEL-LABEL: sample_c_d_1d:
81; GFX10GISEL:       ; %bb.0: ; %main_body
82; GFX10GISEL-NEXT:    image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
83; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
84; GFX10GISEL-NEXT:    ; return to shader part epilog
85main_body:
86  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
87  ret <4 x float> %v
88}
89
90define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) {
91; GFX10-LABEL: sample_c_d_2d:
92; GFX10:       ; %bb.0: ; %main_body
93; GFX10-NEXT:    v_perm_b32 v5, v6, v5, 0x5040100
94; GFX10-NEXT:    image_sample_c_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
95; GFX10-NEXT:    s_waitcnt vmcnt(0)
96; GFX10-NEXT:    ; return to shader part epilog
97;
98; GFX10GISEL-LABEL: sample_c_d_2d:
99; GFX10GISEL:       ; %bb.0: ; %main_body
100; GFX10GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v5
101; GFX10GISEL-NEXT:    v_lshl_or_b32 v5, v6, 16, v5
102; GFX10GISEL-NEXT:    image_sample_c_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
103; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
104; GFX10GISEL-NEXT:    ; return to shader part epilog
105main_body:
106  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
107  ret <4 x float> %v
108}
109
110define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s, half %clamp) {
111; GFX10-LABEL: sample_d_cl_1d:
112; GFX10:       ; %bb.0: ; %main_body
113; GFX10-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
114; GFX10-NEXT:    image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
115; GFX10-NEXT:    s_waitcnt vmcnt(0)
116; GFX10-NEXT:    ; return to shader part epilog
117;
118; GFX10GISEL-LABEL: sample_d_cl_1d:
119; GFX10GISEL:       ; %bb.0: ; %main_body
120; GFX10GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
121; GFX10GISEL-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
122; GFX10GISEL-NEXT:    image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
123; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
124; GFX10GISEL-NEXT:    ; return to shader part epilog
125main_body:
126  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
127  ret <4 x float> %v
128}
129
130define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {
131; GFX10-LABEL: sample_d_cl_2d:
132; GFX10:       ; %bb.0: ; %main_body
133; GFX10-NEXT:    v_mov_b32_e32 v11, v6
134; GFX10-NEXT:    v_mov_b32_e32 v9, v3
135; GFX10-NEXT:    v_mov_b32_e32 v8, v2
136; GFX10-NEXT:    v_mov_b32_e32 v7, v1
137; GFX10-NEXT:    v_mov_b32_e32 v6, v0
138; GFX10-NEXT:    v_perm_b32 v10, v5, v4, 0x5040100
139; GFX10-NEXT:    image_sample_d_cl v[0:3], v[6:11], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
140; GFX10-NEXT:    s_waitcnt vmcnt(0)
141; GFX10-NEXT:    ; return to shader part epilog
142;
143; GFX10GISEL-LABEL: sample_d_cl_2d:
144; GFX10GISEL:       ; %bb.0: ; %main_body
145; GFX10GISEL-NEXT:    v_mov_b32_e32 v7, v5
146; GFX10GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
147; GFX10GISEL-NEXT:    v_mov_b32_e32 v5, v6
148; GFX10GISEL-NEXT:    v_lshl_or_b32 v4, v7, 16, v4
149; GFX10GISEL-NEXT:    image_sample_d_cl v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
150; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
151; GFX10GISEL-NEXT:    ; return to shader part epilog
152main_body:
153  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
154  ret <4 x float> %v
155}
156
157define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp) {
158; GFX10-LABEL: sample_c_d_cl_1d:
159; GFX10:       ; %bb.0: ; %main_body
160; GFX10-NEXT:    v_perm_b32 v3, v4, v3, 0x5040100
161; GFX10-NEXT:    image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
162; GFX10-NEXT:    s_waitcnt vmcnt(0)
163; GFX10-NEXT:    ; return to shader part epilog
164;
165; GFX10GISEL-LABEL: sample_c_d_cl_1d:
166; GFX10GISEL:       ; %bb.0: ; %main_body
167; GFX10GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
168; GFX10GISEL-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
169; GFX10GISEL-NEXT:    image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
170; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
171; GFX10GISEL-NEXT:    ; return to shader part epilog
172main_body:
173  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
174  ret <4 x float> %v
175}
176
177define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {
178; GFX10-LABEL: sample_c_d_cl_2d:
179; GFX10:       ; %bb.0: ; %main_body
180; GFX10-NEXT:    v_mov_b32_e32 v13, v7
181; GFX10-NEXT:    v_mov_b32_e32 v11, v4
182; GFX10-NEXT:    v_mov_b32_e32 v10, v3
183; GFX10-NEXT:    v_mov_b32_e32 v9, v2
184; GFX10-NEXT:    v_mov_b32_e32 v8, v1
185; GFX10-NEXT:    v_mov_b32_e32 v7, v0
186; GFX10-NEXT:    v_perm_b32 v12, v6, v5, 0x5040100
187; GFX10-NEXT:    image_sample_c_d_cl v[0:3], v[7:13], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
188; GFX10-NEXT:    s_waitcnt vmcnt(0)
189; GFX10-NEXT:    ; return to shader part epilog
190;
191; GFX10GISEL-LABEL: sample_c_d_cl_2d:
192; GFX10GISEL:       ; %bb.0: ; %main_body
193; GFX10GISEL-NEXT:    v_mov_b32_e32 v8, v6
194; GFX10GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v5
195; GFX10GISEL-NEXT:    v_mov_b32_e32 v6, v7
196; GFX10GISEL-NEXT:    v_lshl_or_b32 v5, v8, 16, v5
197; GFX10GISEL-NEXT:    image_sample_c_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
198; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
199; GFX10GISEL-NEXT:    ; return to shader part epilog
200main_body:
201  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
202  ret <4 x float> %v
203}
204
205define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s) {
206; GFX10-LABEL: sample_cd_1d:
207; GFX10:       ; %bb.0: ; %main_body
208; GFX10-NEXT:    image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
209; GFX10-NEXT:    s_waitcnt vmcnt(0)
210; GFX10-NEXT:    ; return to shader part epilog
211;
212; GFX10GISEL-LABEL: sample_cd_1d:
213; GFX10GISEL:       ; %bb.0: ; %main_body
214; GFX10GISEL-NEXT:    image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
215; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
216; GFX10GISEL-NEXT:    ; return to shader part epilog
217main_body:
218  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
219  ret <4 x float> %v
220}
221
222define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) {
223; GFX10-LABEL: sample_cd_2d:
224; GFX10:       ; %bb.0: ; %main_body
225; GFX10-NEXT:    v_perm_b32 v4, v5, v4, 0x5040100
226; GFX10-NEXT:    image_sample_cd v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
227; GFX10-NEXT:    s_waitcnt vmcnt(0)
228; GFX10-NEXT:    ; return to shader part epilog
229;
230; GFX10GISEL-LABEL: sample_cd_2d:
231; GFX10GISEL:       ; %bb.0: ; %main_body
232; GFX10GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
233; GFX10GISEL-NEXT:    v_lshl_or_b32 v4, v5, 16, v4
234; GFX10GISEL-NEXT:    image_sample_cd v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
235; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
236; GFX10GISEL-NEXT:    ; return to shader part epilog
237main_body:
238  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
239  ret <4 x float> %v
240}
241
242define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s) {
243; GFX10-LABEL: sample_c_cd_1d:
244; GFX10:       ; %bb.0: ; %main_body
245; GFX10-NEXT:    image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
246; GFX10-NEXT:    s_waitcnt vmcnt(0)
247; GFX10-NEXT:    ; return to shader part epilog
248;
249; GFX10GISEL-LABEL: sample_c_cd_1d:
250; GFX10GISEL:       ; %bb.0: ; %main_body
251; GFX10GISEL-NEXT:    image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
252; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
253; GFX10GISEL-NEXT:    ; return to shader part epilog
254main_body:
255  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
256  ret <4 x float> %v
257}
258
259define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) {
260; GFX10-LABEL: sample_c_cd_2d:
261; GFX10:       ; %bb.0: ; %main_body
262; GFX10-NEXT:    v_perm_b32 v5, v6, v5, 0x5040100
263; GFX10-NEXT:    image_sample_c_cd v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
264; GFX10-NEXT:    s_waitcnt vmcnt(0)
265; GFX10-NEXT:    ; return to shader part epilog
266;
267; GFX10GISEL-LABEL: sample_c_cd_2d:
268; GFX10GISEL:       ; %bb.0: ; %main_body
269; GFX10GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v5
270; GFX10GISEL-NEXT:    v_lshl_or_b32 v5, v6, 16, v5
271; GFX10GISEL-NEXT:    image_sample_c_cd v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
272; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
273; GFX10GISEL-NEXT:    ; return to shader part epilog
274main_body:
275  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
276  ret <4 x float> %v
277}
278
279define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s, half %clamp) {
280; GFX10-LABEL: sample_cd_cl_1d:
281; GFX10:       ; %bb.0: ; %main_body
282; GFX10-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
283; GFX10-NEXT:    image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
284; GFX10-NEXT:    s_waitcnt vmcnt(0)
285; GFX10-NEXT:    ; return to shader part epilog
286;
287; GFX10GISEL-LABEL: sample_cd_cl_1d:
288; GFX10GISEL:       ; %bb.0: ; %main_body
289; GFX10GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
290; GFX10GISEL-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
291; GFX10GISEL-NEXT:    image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
292; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
293; GFX10GISEL-NEXT:    ; return to shader part epilog
294main_body:
295  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
296  ret <4 x float> %v
297}
298
299define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {
300; GFX10-LABEL: sample_cd_cl_2d:
301; GFX10:       ; %bb.0: ; %main_body
302; GFX10-NEXT:    v_mov_b32_e32 v11, v6
303; GFX10-NEXT:    v_mov_b32_e32 v9, v3
304; GFX10-NEXT:    v_mov_b32_e32 v8, v2
305; GFX10-NEXT:    v_mov_b32_e32 v7, v1
306; GFX10-NEXT:    v_mov_b32_e32 v6, v0
307; GFX10-NEXT:    v_perm_b32 v10, v5, v4, 0x5040100
308; GFX10-NEXT:    image_sample_cd_cl v[0:3], v[6:11], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
309; GFX10-NEXT:    s_waitcnt vmcnt(0)
310; GFX10-NEXT:    ; return to shader part epilog
311;
312; GFX10GISEL-LABEL: sample_cd_cl_2d:
313; GFX10GISEL:       ; %bb.0: ; %main_body
314; GFX10GISEL-NEXT:    v_mov_b32_e32 v7, v5
315; GFX10GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
316; GFX10GISEL-NEXT:    v_mov_b32_e32 v5, v6
317; GFX10GISEL-NEXT:    v_lshl_or_b32 v4, v7, 16, v4
318; GFX10GISEL-NEXT:    image_sample_cd_cl v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
319; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
320; GFX10GISEL-NEXT:    ; return to shader part epilog
321main_body:
322  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
323  ret <4 x float> %v
324}
325
326define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp) {
327; GFX10-LABEL: sample_c_cd_cl_1d:
328; GFX10:       ; %bb.0: ; %main_body
329; GFX10-NEXT:    v_perm_b32 v3, v4, v3, 0x5040100
330; GFX10-NEXT:    image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
331; GFX10-NEXT:    s_waitcnt vmcnt(0)
332; GFX10-NEXT:    ; return to shader part epilog
333;
334; GFX10GISEL-LABEL: sample_c_cd_cl_1d:
335; GFX10GISEL:       ; %bb.0: ; %main_body
336; GFX10GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
337; GFX10GISEL-NEXT:    v_lshl_or_b32 v3, v4, 16, v3
338; GFX10GISEL-NEXT:    image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
339; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
340; GFX10GISEL-NEXT:    ; return to shader part epilog
341main_body:
342  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
343  ret <4 x float> %v
344}
345
346define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) {
347; GFX10-LABEL: sample_c_cd_cl_2d:
348; GFX10:       ; %bb.0: ; %main_body
349; GFX10-NEXT:    v_mov_b32_e32 v13, v7
350; GFX10-NEXT:    v_mov_b32_e32 v11, v4
351; GFX10-NEXT:    v_mov_b32_e32 v10, v3
352; GFX10-NEXT:    v_mov_b32_e32 v9, v2
353; GFX10-NEXT:    v_mov_b32_e32 v8, v1
354; GFX10-NEXT:    v_mov_b32_e32 v7, v0
355; GFX10-NEXT:    v_perm_b32 v12, v6, v5, 0x5040100
356; GFX10-NEXT:    image_sample_c_cd_cl v[0:3], v[7:13], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
357; GFX10-NEXT:    s_waitcnt vmcnt(0)
358; GFX10-NEXT:    ; return to shader part epilog
359;
360; GFX10GISEL-LABEL: sample_c_cd_cl_2d:
361; GFX10GISEL:       ; %bb.0: ; %main_body
362; GFX10GISEL-NEXT:    v_mov_b32_e32 v8, v6
363; GFX10GISEL-NEXT:    v_and_b32_e32 v5, 0xffff, v5
364; GFX10GISEL-NEXT:    v_mov_b32_e32 v6, v7
365; GFX10GISEL-NEXT:    v_lshl_or_b32 v5, v8, 16, v5
366; GFX10GISEL-NEXT:    image_sample_c_cd_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
367; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
368; GFX10GISEL-NEXT:    ; return to shader part epilog
369main_body:
370  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
371  ret <4 x float> %v
372}
373
374define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice) {
375; GFX10-LABEL: sample_c_d_o_2darray_V1:
376; GFX10:       ; %bb.0: ; %main_body
377; GFX10-NEXT:    v_mov_b32_e32 v15, v8
378; GFX10-NEXT:    v_mov_b32_e32 v13, v5
379; GFX10-NEXT:    v_mov_b32_e32 v12, v4
380; GFX10-NEXT:    v_mov_b32_e32 v11, v3
381; GFX10-NEXT:    v_mov_b32_e32 v10, v2
382; GFX10-NEXT:    v_mov_b32_e32 v9, v1
383; GFX10-NEXT:    v_mov_b32_e32 v8, v0
384; GFX10-NEXT:    v_perm_b32 v14, v7, v6, 0x5040100
385; GFX10-NEXT:    image_sample_c_d_o v0, v[8:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16
386; GFX10-NEXT:    s_waitcnt vmcnt(0)
387; GFX10-NEXT:    ; return to shader part epilog
388;
389; GFX10GISEL-LABEL: sample_c_d_o_2darray_V1:
390; GFX10GISEL:       ; %bb.0: ; %main_body
391; GFX10GISEL-NEXT:    v_mov_b32_e32 v9, v7
392; GFX10GISEL-NEXT:    v_and_b32_e32 v6, 0xffff, v6
393; GFX10GISEL-NEXT:    v_mov_b32_e32 v7, v8
394; GFX10GISEL-NEXT:    v_lshl_or_b32 v6, v9, 16, v6
395; GFX10GISEL-NEXT:    image_sample_c_d_o v0, v[0:7], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16
396; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
397; GFX10GISEL-NEXT:    ; return to shader part epilog
398main_body:
399  %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f16(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
400  ret float %v
401}
402
403define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice) {
404; GFX10-LABEL: sample_c_d_o_2darray_V2:
405; GFX10:       ; %bb.0: ; %main_body
406; GFX10-NEXT:    v_mov_b32_e32 v15, v8
407; GFX10-NEXT:    v_mov_b32_e32 v13, v5
408; GFX10-NEXT:    v_mov_b32_e32 v12, v4
409; GFX10-NEXT:    v_mov_b32_e32 v11, v3
410; GFX10-NEXT:    v_mov_b32_e32 v10, v2
411; GFX10-NEXT:    v_mov_b32_e32 v9, v1
412; GFX10-NEXT:    v_mov_b32_e32 v8, v0
413; GFX10-NEXT:    v_perm_b32 v14, v7, v6, 0x5040100
414; GFX10-NEXT:    image_sample_c_d_o v[0:1], v[8:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16
415; GFX10-NEXT:    s_waitcnt vmcnt(0)
416; GFX10-NEXT:    ; return to shader part epilog
417;
418; GFX10GISEL-LABEL: sample_c_d_o_2darray_V2:
419; GFX10GISEL:       ; %bb.0: ; %main_body
420; GFX10GISEL-NEXT:    v_mov_b32_e32 v9, v7
421; GFX10GISEL-NEXT:    v_and_b32_e32 v6, 0xffff, v6
422; GFX10GISEL-NEXT:    v_mov_b32_e32 v7, v8
423; GFX10GISEL-NEXT:    v_lshl_or_b32 v6, v9, 16, v6
424; GFX10GISEL-NEXT:    image_sample_c_d_o v[0:1], v[0:7], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16
425; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
426; GFX10GISEL-NEXT:    ; return to shader part epilog
427main_body:
428  %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
429  ret <2 x float> %v
430}
431
432declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f16(i32, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
433declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
434declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f16(i32, float, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
435declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32, float, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
436declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
437declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
438declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
439declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
440declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
441
442declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f16(i32, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
443declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
444declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32, float, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
445declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
446declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
447declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
448declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
449declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
450
451declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f16(i32, i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
452declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32, i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
453
454define amdgpu_ps <4 x float> @sample_g16_noa16_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
455; GFX10-LABEL: sample_g16_noa16_d_1d:
456; GFX10:       ; %bb.0: ; %main_body
457; GFX10-NEXT:    image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
458; GFX10-NEXT:    s_waitcnt vmcnt(0)
459; GFX10-NEXT:    ; return to shader part epilog
460;
461; GFX10GISEL-LABEL: sample_g16_noa16_d_1d:
462; GFX10GISEL:       ; %bb.0: ; %main_body
463; GFX10GISEL-NEXT:    image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
464; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
465; GFX10GISEL-NEXT:    ; return to shader part epilog
466main_body:
467  %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
468  ret <4 x float> %v
469}
470
471define amdgpu_ps <4 x float> @sample_g16_noa16_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
472; GFX10-LABEL: sample_g16_noa16_d_2d:
473; GFX10:       ; %bb.0: ; %main_body
474; GFX10-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
475; GFX10-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
476; GFX10-NEXT:    image_sample_d_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
477; GFX10-NEXT:    s_waitcnt vmcnt(0)
478; GFX10-NEXT:    ; return to shader part epilog
479;
480; GFX10GISEL-LABEL: sample_g16_noa16_d_2d:
481; GFX10GISEL:       ; %bb.0: ; %main_body
482; GFX10GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
483; GFX10GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
484; GFX10GISEL-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
485; GFX10GISEL-NEXT:    v_lshl_or_b32 v1, v3, 16, v2
486; GFX10GISEL-NEXT:    image_sample_d_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
487; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
488; GFX10GISEL-NEXT:    ; return to shader part epilog
489main_body:
490  %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
491  ret <4 x float> %v
492}
493
494define amdgpu_ps <4 x float> @sample_g16_noa16_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) {
495; GFX10-LABEL: sample_g16_noa16_d_3d:
496; GFX10:       ; %bb.0: ; %main_body
497; GFX10-NEXT:    v_mov_b32_e32 v9, v3
498; GFX10-NEXT:    v_mov_b32_e32 v3, v2
499; GFX10-NEXT:    v_perm_b32 v2, v1, v0, 0x5040100
500; GFX10-NEXT:    v_perm_b32 v4, v4, v9, 0x5040100
501; GFX10-NEXT:    image_sample_d_g16 v[0:3], v[2:8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
502; GFX10-NEXT:    s_waitcnt vmcnt(0)
503; GFX10-NEXT:    ; return to shader part epilog
504;
505; GFX10GISEL-LABEL: sample_g16_noa16_d_3d:
506; GFX10GISEL:       ; %bb.0: ; %main_body
507; GFX10GISEL-NEXT:    v_mov_b32_e32 v9, v3
508; GFX10GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
509; GFX10GISEL-NEXT:    v_mov_b32_e32 v3, v2
510; GFX10GISEL-NEXT:    v_and_b32_e32 v9, 0xffff, v9
511; GFX10GISEL-NEXT:    v_lshl_or_b32 v2, v1, 16, v0
512; GFX10GISEL-NEXT:    v_lshl_or_b32 v4, v4, 16, v9
513; GFX10GISEL-NEXT:    image_sample_d_g16 v[0:3], v[2:8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
514; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
515; GFX10GISEL-NEXT:    ; return to shader part epilog
516main_body:
517  %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
518  ret <4 x float> %v
519}
520
521define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
522; GFX10-LABEL: sample_g16_noa16_c_d_1d:
523; GFX10:       ; %bb.0: ; %main_body
524; GFX10-NEXT:    image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
525; GFX10-NEXT:    s_waitcnt vmcnt(0)
526; GFX10-NEXT:    ; return to shader part epilog
527;
528; GFX10GISEL-LABEL: sample_g16_noa16_c_d_1d:
529; GFX10GISEL:       ; %bb.0: ; %main_body
530; GFX10GISEL-NEXT:    image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
531; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
532; GFX10GISEL-NEXT:    ; return to shader part epilog
533main_body:
534  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
535  ret <4 x float> %v
536}
537
538define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
539; GFX10-LABEL: sample_g16_noa16_c_d_2d:
540; GFX10:       ; %bb.0: ; %main_body
541; GFX10-NEXT:    v_perm_b32 v3, v4, v3, 0x5040100
542; GFX10-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
543; GFX10-NEXT:    image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
544; GFX10-NEXT:    s_waitcnt vmcnt(0)
545; GFX10-NEXT:    ; return to shader part epilog
546;
547; GFX10GISEL-LABEL: sample_g16_noa16_c_d_2d:
548; GFX10GISEL:       ; %bb.0: ; %main_body
549; GFX10GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
550; GFX10GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
551; GFX10GISEL-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
552; GFX10GISEL-NEXT:    v_lshl_or_b32 v2, v4, 16, v3
553; GFX10GISEL-NEXT:    image_sample_c_d_g16 v[0:3], [v0, v1, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
554; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
555; GFX10GISEL-NEXT:    ; return to shader part epilog
556main_body:
557  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
558  ret <4 x float> %v
559}
560
561define amdgpu_ps <4 x float> @sample_g16_noa16_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
562; GFX10-LABEL: sample_g16_noa16_d_cl_1d:
563; GFX10:       ; %bb.0: ; %main_body
564; GFX10-NEXT:    image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
565; GFX10-NEXT:    s_waitcnt vmcnt(0)
566; GFX10-NEXT:    ; return to shader part epilog
567;
568; GFX10GISEL-LABEL: sample_g16_noa16_d_cl_1d:
569; GFX10GISEL:       ; %bb.0: ; %main_body
570; GFX10GISEL-NEXT:    image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
571; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
572; GFX10GISEL-NEXT:    ; return to shader part epilog
573main_body:
574  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
575  ret <4 x float> %v
576}
577
578define amdgpu_ps <4 x float> @sample_g16_noa16_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
579; GFX10-LABEL: sample_g16_noa16_d_cl_2d:
580; GFX10:       ; %bb.0: ; %main_body
581; GFX10-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
582; GFX10-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
583; GFX10-NEXT:    image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
584; GFX10-NEXT:    s_waitcnt vmcnt(0)
585; GFX10-NEXT:    ; return to shader part epilog
586;
587; GFX10GISEL-LABEL: sample_g16_noa16_d_cl_2d:
588; GFX10GISEL:       ; %bb.0: ; %main_body
589; GFX10GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
590; GFX10GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
591; GFX10GISEL-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
592; GFX10GISEL-NEXT:    v_lshl_or_b32 v1, v3, 16, v2
593; GFX10GISEL-NEXT:    image_sample_d_cl_g16 v[0:3], [v0, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
594; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
595; GFX10GISEL-NEXT:    ; return to shader part epilog
596main_body:
597  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
598  ret <4 x float> %v
599}
600
601define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
602; GFX10-LABEL: sample_g16_noa16_c_d_cl_1d:
603; GFX10:       ; %bb.0: ; %main_body
604; GFX10-NEXT:    image_sample_c_d_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
605; GFX10-NEXT:    s_waitcnt vmcnt(0)
606; GFX10-NEXT:    ; return to shader part epilog
607;
608; GFX10GISEL-LABEL: sample_g16_noa16_c_d_cl_1d:
609; GFX10GISEL:       ; %bb.0: ; %main_body
610; GFX10GISEL-NEXT:    image_sample_c_d_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
611; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
612; GFX10GISEL-NEXT:    ; return to shader part epilog
613main_body:
614  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
615  ret <4 x float> %v
616}
617
618define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
619; GFX10-LABEL: sample_g16_noa16_c_d_cl_2d:
620; GFX10:       ; %bb.0: ; %main_body
621; GFX10-NEXT:    v_mov_b32_e32 v8, v2
622; GFX10-NEXT:    v_mov_b32_e32 v2, v0
623; GFX10-NEXT:    v_perm_b32 v4, v4, v3, 0x5040100
624; GFX10-NEXT:    v_perm_b32 v3, v8, v1, 0x5040100
625; GFX10-NEXT:    image_sample_c_d_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
626; GFX10-NEXT:    s_waitcnt vmcnt(0)
627; GFX10-NEXT:    ; return to shader part epilog
628;
629; GFX10GISEL-LABEL: sample_g16_noa16_c_d_cl_2d:
630; GFX10GISEL:       ; %bb.0: ; %main_body
631; GFX10GISEL-NEXT:    v_mov_b32_e32 v8, v2
632; GFX10GISEL-NEXT:    v_mov_b32_e32 v2, v0
633; GFX10GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v1
634; GFX10GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v3
635; GFX10GISEL-NEXT:    v_lshl_or_b32 v3, v8, 16, v0
636; GFX10GISEL-NEXT:    v_lshl_or_b32 v4, v4, 16, v1
637; GFX10GISEL-NEXT:    image_sample_c_d_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
638; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
639; GFX10GISEL-NEXT:    ; return to shader part epilog
640main_body:
641  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
642  ret <4 x float> %v
643}
644
645define amdgpu_ps <4 x float> @sample_g16_noa16_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) {
646; GFX10-LABEL: sample_g16_noa16_cd_1d:
647; GFX10:       ; %bb.0: ; %main_body
648; GFX10-NEXT:    image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
649; GFX10-NEXT:    s_waitcnt vmcnt(0)
650; GFX10-NEXT:    ; return to shader part epilog
651;
652; GFX10GISEL-LABEL: sample_g16_noa16_cd_1d:
653; GFX10GISEL:       ; %bb.0: ; %main_body
654; GFX10GISEL-NEXT:    image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
655; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
656; GFX10GISEL-NEXT:    ; return to shader part epilog
657main_body:
658  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
659  ret <4 x float> %v
660}
661
662define amdgpu_ps <4 x float> @sample_g16_noa16_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
663; GFX10-LABEL: sample_g16_noa16_cd_2d:
664; GFX10:       ; %bb.0: ; %main_body
665; GFX10-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
666; GFX10-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
667; GFX10-NEXT:    image_sample_cd_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
668; GFX10-NEXT:    s_waitcnt vmcnt(0)
669; GFX10-NEXT:    ; return to shader part epilog
670;
671; GFX10GISEL-LABEL: sample_g16_noa16_cd_2d:
672; GFX10GISEL:       ; %bb.0: ; %main_body
673; GFX10GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
674; GFX10GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
675; GFX10GISEL-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
676; GFX10GISEL-NEXT:    v_lshl_or_b32 v1, v3, 16, v2
677; GFX10GISEL-NEXT:    image_sample_cd_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
678; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
679; GFX10GISEL-NEXT:    ; return to shader part epilog
680main_body:
681  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
682  ret <4 x float> %v
683}
684
685define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) {
686; GFX10-LABEL: sample_g16_noa16_c_cd_1d:
687; GFX10:       ; %bb.0: ; %main_body
688; GFX10-NEXT:    image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
689; GFX10-NEXT:    s_waitcnt vmcnt(0)
690; GFX10-NEXT:    ; return to shader part epilog
691;
692; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_1d:
693; GFX10GISEL:       ; %bb.0: ; %main_body
694; GFX10GISEL-NEXT:    image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
695; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
696; GFX10GISEL-NEXT:    ; return to shader part epilog
697main_body:
698  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
699  ret <4 x float> %v
700}
701
702define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) {
703; GFX10-LABEL: sample_g16_noa16_c_cd_2d:
704; GFX10:       ; %bb.0: ; %main_body
705; GFX10-NEXT:    v_perm_b32 v3, v4, v3, 0x5040100
706; GFX10-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
707; GFX10-NEXT:    image_sample_c_cd_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
708; GFX10-NEXT:    s_waitcnt vmcnt(0)
709; GFX10-NEXT:    ; return to shader part epilog
710;
711; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_2d:
712; GFX10GISEL:       ; %bb.0: ; %main_body
713; GFX10GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v1
714; GFX10GISEL-NEXT:    v_and_b32_e32 v3, 0xffff, v3
715; GFX10GISEL-NEXT:    v_lshl_or_b32 v1, v2, 16, v1
716; GFX10GISEL-NEXT:    v_lshl_or_b32 v2, v4, 16, v3
717; GFX10GISEL-NEXT:    image_sample_c_cd_g16 v[0:3], [v0, v1, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
718; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
719; GFX10GISEL-NEXT:    ; return to shader part epilog
720main_body:
721  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
722  ret <4 x float> %v
723}
724
725define amdgpu_ps <4 x float> @sample_g16_noa16_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) {
726; GFX10-LABEL: sample_g16_noa16_cd_cl_1d:
727; GFX10:       ; %bb.0: ; %main_body
728; GFX10-NEXT:    image_sample_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
729; GFX10-NEXT:    s_waitcnt vmcnt(0)
730; GFX10-NEXT:    ; return to shader part epilog
731;
732; GFX10GISEL-LABEL: sample_g16_noa16_cd_cl_1d:
733; GFX10GISEL:       ; %bb.0: ; %main_body
734; GFX10GISEL-NEXT:    image_sample_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
735; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
736; GFX10GISEL-NEXT:    ; return to shader part epilog
737main_body:
738  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
739  ret <4 x float> %v
740}
741
742define amdgpu_ps <4 x float> @sample_g16_noa16_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
743; GFX10-LABEL: sample_g16_noa16_cd_cl_2d:
744; GFX10:       ; %bb.0: ; %main_body
745; GFX10-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
746; GFX10-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
747; GFX10-NEXT:    image_sample_cd_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
748; GFX10-NEXT:    s_waitcnt vmcnt(0)
749; GFX10-NEXT:    ; return to shader part epilog
750;
751; GFX10GISEL-LABEL: sample_g16_noa16_cd_cl_2d:
752; GFX10GISEL:       ; %bb.0: ; %main_body
753; GFX10GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
754; GFX10GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
755; GFX10GISEL-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
756; GFX10GISEL-NEXT:    v_lshl_or_b32 v1, v3, 16, v2
757; GFX10GISEL-NEXT:    image_sample_cd_cl_g16 v[0:3], [v0, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
758; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
759; GFX10GISEL-NEXT:    ; return to shader part epilog
760main_body:
761  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
762  ret <4 x float> %v
763}
764
765define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) {
766; GFX10-LABEL: sample_g16_noa16_c_cd_cl_1d:
767; GFX10:       ; %bb.0: ; %main_body
768; GFX10-NEXT:    image_sample_c_cd_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
769; GFX10-NEXT:    s_waitcnt vmcnt(0)
770; GFX10-NEXT:    ; return to shader part epilog
771;
772; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_cl_1d:
773; GFX10GISEL:       ; %bb.0: ; %main_body
774; GFX10GISEL-NEXT:    image_sample_c_cd_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
775; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
776; GFX10GISEL-NEXT:    ; return to shader part epilog
777main_body:
778  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
779  ret <4 x float> %v
780}
781
782define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) {
783; GFX10-LABEL: sample_g16_noa16_c_cd_cl_2d:
784; GFX10:       ; %bb.0: ; %main_body
785; GFX10-NEXT:    v_mov_b32_e32 v8, v2
786; GFX10-NEXT:    v_mov_b32_e32 v2, v0
787; GFX10-NEXT:    v_perm_b32 v4, v4, v3, 0x5040100
788; GFX10-NEXT:    v_perm_b32 v3, v8, v1, 0x5040100
789; GFX10-NEXT:    image_sample_c_cd_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
790; GFX10-NEXT:    s_waitcnt vmcnt(0)
791; GFX10-NEXT:    ; return to shader part epilog
792;
793; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_cl_2d:
794; GFX10GISEL:       ; %bb.0: ; %main_body
795; GFX10GISEL-NEXT:    v_mov_b32_e32 v8, v2
796; GFX10GISEL-NEXT:    v_mov_b32_e32 v2, v0
797; GFX10GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v1
798; GFX10GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v3
799; GFX10GISEL-NEXT:    v_lshl_or_b32 v3, v8, 16, v0
800; GFX10GISEL-NEXT:    v_lshl_or_b32 v4, v4, 16, v1
801; GFX10GISEL-NEXT:    image_sample_c_cd_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
802; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
803; GFX10GISEL-NEXT:    ; return to shader part epilog
804main_body:
805  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
806  ret <4 x float> %v
807}
808
809define amdgpu_ps float @sample_g16_noa16_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
810; GFX10-LABEL: sample_g16_noa16_c_d_o_2darray_V1:
811; GFX10:       ; %bb.0: ; %main_body
812; GFX10-NEXT:    v_mov_b32_e32 v9, v3
813; GFX10-NEXT:    v_mov_b32_e32 v10, v2
814; GFX10-NEXT:    v_mov_b32_e32 v3, v1
815; GFX10-NEXT:    v_mov_b32_e32 v2, v0
816; GFX10-NEXT:    v_perm_b32 v5, v5, v4, 0x5040100
817; GFX10-NEXT:    v_perm_b32 v4, v9, v10, 0x5040100
818; GFX10-NEXT:    image_sample_c_d_o_g16 v0, v[2:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
819; GFX10-NEXT:    s_waitcnt vmcnt(0)
820; GFX10-NEXT:    ; return to shader part epilog
821;
822; GFX10GISEL-LABEL: sample_g16_noa16_c_d_o_2darray_V1:
823; GFX10GISEL:       ; %bb.0: ; %main_body
824; GFX10GISEL-NEXT:    v_mov_b32_e32 v9, v2
825; GFX10GISEL-NEXT:    v_mov_b32_e32 v10, v3
826; GFX10GISEL-NEXT:    v_mov_b32_e32 v2, v0
827; GFX10GISEL-NEXT:    v_mov_b32_e32 v3, v1
828; GFX10GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v4
829; GFX10GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v9
830; GFX10GISEL-NEXT:    v_lshl_or_b32 v5, v5, 16, v1
831; GFX10GISEL-NEXT:    v_lshl_or_b32 v4, v10, 16, v0
832; GFX10GISEL-NEXT:    image_sample_c_d_o_g16 v0, v[2:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
833; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
834; GFX10GISEL-NEXT:    ; return to shader part epilog
835main_body:
836  %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
837  ret float %v
838}
839
840define amdgpu_ps <2 x float> @sample_g16_noa16_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) {
841; GFX10-LABEL: sample_g16_noa16_c_d_o_2darray_V2:
842; GFX10:       ; %bb.0: ; %main_body
843; GFX10-NEXT:    v_mov_b32_e32 v9, v3
844; GFX10-NEXT:    v_mov_b32_e32 v10, v2
845; GFX10-NEXT:    v_mov_b32_e32 v3, v1
846; GFX10-NEXT:    v_mov_b32_e32 v2, v0
847; GFX10-NEXT:    v_perm_b32 v5, v5, v4, 0x5040100
848; GFX10-NEXT:    v_perm_b32 v4, v9, v10, 0x5040100
849; GFX10-NEXT:    image_sample_c_d_o_g16 v[0:1], v[2:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
850; GFX10-NEXT:    s_waitcnt vmcnt(0)
851; GFX10-NEXT:    ; return to shader part epilog
852;
853; GFX10GISEL-LABEL: sample_g16_noa16_c_d_o_2darray_V2:
854; GFX10GISEL:       ; %bb.0: ; %main_body
855; GFX10GISEL-NEXT:    v_mov_b32_e32 v9, v2
856; GFX10GISEL-NEXT:    v_mov_b32_e32 v10, v3
857; GFX10GISEL-NEXT:    v_mov_b32_e32 v2, v0
858; GFX10GISEL-NEXT:    v_mov_b32_e32 v3, v1
859; GFX10GISEL-NEXT:    v_and_b32_e32 v1, 0xffff, v4
860; GFX10GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v9
861; GFX10GISEL-NEXT:    v_lshl_or_b32 v5, v5, 16, v1
862; GFX10GISEL-NEXT:    v_lshl_or_b32 v4, v10, 16, v0
863; GFX10GISEL-NEXT:    image_sample_c_d_o_g16 v[0:1], v[2:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
864; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
865; GFX10GISEL-NEXT:    ; return to shader part epilog
866main_body:
867  %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
868  ret <2 x float> %v
869}
870
871declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
872declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
873declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
874declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
875declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
876declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
877declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
878declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
879declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
880
881declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
882declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
883declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
884declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
885declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
886declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
887declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
888declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
889
890declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
891declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
892
893define amdgpu_ps <4 x float> @sample_d_1d_g16_a16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
894; GFX10-LABEL: sample_d_1d_g16_a16:
895; GFX10:       ; %bb.0: ; %main_body
896; GFX10-NEXT:    image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
897; GFX10-NEXT:    s_waitcnt vmcnt(0)
898; GFX10-NEXT:    ; return to shader part epilog
899;
900; GFX10GISEL-LABEL: sample_d_1d_g16_a16:
901; GFX10GISEL:       ; %bb.0: ; %main_body
902; GFX10GISEL-NEXT:    image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
903; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
904; GFX10GISEL-NEXT:    ; return to shader part epilog
905main_body:
906  %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
907  ret <4 x float> %v
908}
909
910define amdgpu_ps <4 x float> @sample_d_2d_g16_a16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
911; GFX10-LABEL: sample_d_2d_g16_a16:
912; GFX10:       ; %bb.0: ; %main_body
913; GFX10-NEXT:    v_perm_b32 v4, v5, v4, 0x5040100
914; GFX10-NEXT:    v_perm_b32 v3, v3, v2, 0x5040100
915; GFX10-NEXT:    v_perm_b32 v2, v1, v0, 0x5040100
916; GFX10-NEXT:    image_sample_d_g16 v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
917; GFX10-NEXT:    s_waitcnt vmcnt(0)
918; GFX10-NEXT:    ; return to shader part epilog
919;
920; GFX10GISEL-LABEL: sample_d_2d_g16_a16:
921; GFX10GISEL:       ; %bb.0: ; %main_body
922; GFX10GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
923; GFX10GISEL-NEXT:    v_and_b32_e32 v2, 0xffff, v2
924; GFX10GISEL-NEXT:    v_and_b32_e32 v4, 0xffff, v4
925; GFX10GISEL-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
926; GFX10GISEL-NEXT:    v_lshl_or_b32 v1, v3, 16, v2
927; GFX10GISEL-NEXT:    v_lshl_or_b32 v2, v5, 16, v4
928; GFX10GISEL-NEXT:    image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
929; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
930; GFX10GISEL-NEXT:    ; return to shader part epilog
931main_body:
932  %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
933  ret <4 x float> %v
934}
935
936define amdgpu_ps <4 x float> @sample_d_3d_g16_a16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) {
937; GFX10-LABEL: sample_d_3d_g16_a16:
938; GFX10:       ; %bb.0: ; %main_body
939; GFX10-NEXT:    v_mov_b32_e32 v12, v8
940; GFX10-NEXT:    v_mov_b32_e32 v10, v5
941; GFX10-NEXT:    v_mov_b32_e32 v8, v2
942; GFX10-NEXT:    v_perm_b32 v11, v7, v6, 0x5040100
943; GFX10-NEXT:    v_perm_b32 v9, v4, v3, 0x5040100
944; GFX10-NEXT:    v_perm_b32 v7, v1, v0, 0x5040100
945; GFX10-NEXT:    image_sample_d_g16 v[0:3], v[7:12], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
946; GFX10-NEXT:    s_waitcnt vmcnt(0)
947; GFX10-NEXT:    ; return to shader part epilog
948;
949; GFX10GISEL-LABEL: sample_d_3d_g16_a16:
950; GFX10GISEL:       ; %bb.0: ; %main_body
951; GFX10GISEL-NEXT:    v_mov_b32_e32 v9, v3
952; GFX10GISEL-NEXT:    v_mov_b32_e32 v10, v7
953; GFX10GISEL-NEXT:    v_mov_b32_e32 v7, v8
954; GFX10GISEL-NEXT:    v_and_b32_e32 v0, 0xffff, v0
955; GFX10GISEL-NEXT:    v_and_b32_e32 v6, 0xffff, v6
956; GFX10GISEL-NEXT:    v_and_b32_e32 v8, 0xffff, v9
957; GFX10GISEL-NEXT:    v_mov_b32_e32 v3, v2
958; GFX10GISEL-NEXT:    v_lshl_or_b32 v2, v1, 16, v0
959; GFX10GISEL-NEXT:    v_lshl_or_b32 v6, v10, 16, v6
960; GFX10GISEL-NEXT:    v_lshl_or_b32 v4, v4, 16, v8
961; GFX10GISEL-NEXT:    image_sample_d_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16
962; GFX10GISEL-NEXT:    s_waitcnt vmcnt(0)
963; GFX10GISEL-NEXT:    ; return to shader part epilog
964main_body:
965  %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
966  ret <4 x float> %v
967}
968
969declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32, half,  half,  half, <8 x i32>, <4 x i32>, i1, i32, i32)
970declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32, half,  half,  half,  half,  half,  half, <8 x i32>, <4 x i32>, i1, i32, i32)
971declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32, half,  half,  half,  half,  half,  half,  half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32)
972
973attributes #0 = { nounwind }
974attributes #1 = { nounwind readonly }
975attributes #2 = { nounwind readnone }
976