xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll (revision 43c7eb5d7b237bc18385f0a5529f1e4b8bf4d6a3)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=VERDE %s
3; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s
4; RUN: llc -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s
5; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
6; RUN: llc -mtriple=amdgcn -mcpu=gfx10-1-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
7; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
8; RUN: llc -mtriple=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
9; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s
10
11define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
12; VERDE-LABEL: sample_1d:
13; VERDE:       ; %bb.0: ; %main_body
14; VERDE-NEXT:    s_mov_b64 s[12:13], exec
15; VERDE-NEXT:    s_wqm_b64 exec, exec
16; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
17; VERDE-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
18; VERDE-NEXT:    s_waitcnt vmcnt(0)
19; VERDE-NEXT:    ; return to shader part epilog
20;
21; GFX6789-LABEL: sample_1d:
22; GFX6789:       ; %bb.0: ; %main_body
23; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
24; GFX6789-NEXT:    s_wqm_b64 exec, exec
25; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
26; GFX6789-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf
27; GFX6789-NEXT:    s_waitcnt vmcnt(0)
28; GFX6789-NEXT:    ; return to shader part epilog
29;
30; GFX10PLUS-LABEL: sample_1d:
31; GFX10PLUS:       ; %bb.0: ; %main_body
32; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
33; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
34; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
35; GFX10PLUS-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
36; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
37; GFX10PLUS-NEXT:    ; return to shader part epilog
38;
39; GFX12-LABEL: sample_1d:
40; GFX12:       ; %bb.0: ; %main_body
41; GFX12-NEXT:    s_mov_b32 s12, exec_lo
42; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
43; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
44; GFX12-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
45; GFX12-NEXT:    s_wait_samplecnt 0x0
46; GFX12-NEXT:    ; return to shader part epilog
47main_body:
48  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
49  ret <4 x float> %v
50}
51
52define amdgpu_ps <4 x float> @sample_1d_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, ptr addrspace(1) inreg %out, float %s) {
53; VERDE-LABEL: sample_1d_tfe:
54; VERDE:       ; %bb.0: ; %main_body
55; VERDE-NEXT:    s_mov_b64 s[14:15], exec
56; VERDE-NEXT:    s_wqm_b64 exec, exec
57; VERDE-NEXT:    v_mov_b32_e32 v5, v0
58; VERDE-NEXT:    v_mov_b32_e32 v0, 0
59; VERDE-NEXT:    v_mov_b32_e32 v1, v0
60; VERDE-NEXT:    v_mov_b32_e32 v2, v0
61; VERDE-NEXT:    v_mov_b32_e32 v3, v0
62; VERDE-NEXT:    v_mov_b32_e32 v4, v0
63; VERDE-NEXT:    s_and_b64 exec, exec, s[14:15]
64; VERDE-NEXT:    image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf tfe
65; VERDE-NEXT:    s_mov_b32 s15, 0xf000
66; VERDE-NEXT:    s_mov_b32 s14, -1
67; VERDE-NEXT:    s_waitcnt vmcnt(0)
68; VERDE-NEXT:    buffer_store_dword v4, off, s[12:15], 0
69; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
70; VERDE-NEXT:    ; return to shader part epilog
71;
72; GFX6789-LABEL: sample_1d_tfe:
73; GFX6789:       ; %bb.0: ; %main_body
74; GFX6789-NEXT:    s_mov_b64 s[14:15], exec
75; GFX6789-NEXT:    s_wqm_b64 exec, exec
76; GFX6789-NEXT:    v_mov_b32_e32 v6, 0
77; GFX6789-NEXT:    v_mov_b32_e32 v5, v0
78; GFX6789-NEXT:    v_mov_b32_e32 v7, v6
79; GFX6789-NEXT:    v_mov_b32_e32 v8, v6
80; GFX6789-NEXT:    v_mov_b32_e32 v9, v6
81; GFX6789-NEXT:    v_mov_b32_e32 v10, v6
82; GFX6789-NEXT:    v_mov_b32_e32 v0, v6
83; GFX6789-NEXT:    v_mov_b32_e32 v1, v7
84; GFX6789-NEXT:    v_mov_b32_e32 v2, v8
85; GFX6789-NEXT:    v_mov_b32_e32 v3, v9
86; GFX6789-NEXT:    v_mov_b32_e32 v4, v10
87; GFX6789-NEXT:    s_and_b64 exec, exec, s[14:15]
88; GFX6789-NEXT:    image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf tfe
89; GFX6789-NEXT:    s_waitcnt vmcnt(0)
90; GFX6789-NEXT:    global_store_dword v6, v4, s[12:13]
91; GFX6789-NEXT:    s_waitcnt vmcnt(0)
92; GFX6789-NEXT:    ; return to shader part epilog
93;
94; GFX10-LABEL: sample_1d_tfe:
95; GFX10:       ; %bb.0: ; %main_body
96; GFX10-NEXT:    s_mov_b32 s14, exec_lo
97; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
98; GFX10-NEXT:    v_mov_b32_e32 v6, 0
99; GFX10-NEXT:    v_mov_b32_e32 v5, v0
100; GFX10-NEXT:    v_mov_b32_e32 v7, v6
101; GFX10-NEXT:    v_mov_b32_e32 v8, v6
102; GFX10-NEXT:    v_mov_b32_e32 v9, v6
103; GFX10-NEXT:    v_mov_b32_e32 v10, v6
104; GFX10-NEXT:    v_mov_b32_e32 v0, v6
105; GFX10-NEXT:    v_mov_b32_e32 v1, v7
106; GFX10-NEXT:    v_mov_b32_e32 v2, v8
107; GFX10-NEXT:    v_mov_b32_e32 v3, v9
108; GFX10-NEXT:    v_mov_b32_e32 v4, v10
109; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s14
110; GFX10-NEXT:    image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe
111; GFX10-NEXT:    s_waitcnt vmcnt(0)
112; GFX10-NEXT:    global_store_dword v6, v4, s[12:13]
113; GFX10-NEXT:    ; return to shader part epilog
114;
115; GFX11-LABEL: sample_1d_tfe:
116; GFX11:       ; %bb.0: ; %main_body
117; GFX11-NEXT:    s_mov_b32 s14, exec_lo
118; GFX11-NEXT:    s_wqm_b32 exec_lo, exec_lo
119; GFX11-NEXT:    v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, 0
120; GFX11-NEXT:    v_mov_b32_e32 v7, v6
121; GFX11-NEXT:    v_mov_b32_e32 v8, v6
122; GFX11-NEXT:    v_mov_b32_e32 v9, v6
123; GFX11-NEXT:    v_mov_b32_e32 v10, v6
124; GFX11-NEXT:    v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
125; GFX11-NEXT:    v_dual_mov_b32 v2, v8 :: v_dual_mov_b32 v3, v9
126; GFX11-NEXT:    v_mov_b32_e32 v4, v10
127; GFX11-NEXT:    s_and_b32 exec_lo, exec_lo, s14
128; GFX11-NEXT:    image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe
129; GFX11-NEXT:    s_waitcnt vmcnt(0)
130; GFX11-NEXT:    global_store_b32 v6, v4, s[12:13]
131; GFX11-NEXT:    ; return to shader part epilog
132;
133; GFX12-LABEL: sample_1d_tfe:
134; GFX12:       ; %bb.0: ; %main_body
135; GFX12-NEXT:    s_mov_b32 s14, exec_lo
136; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
137; GFX12-NEXT:    v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, 0
138; GFX12-NEXT:    v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v8, v6
139; GFX12-NEXT:    v_dual_mov_b32 v9, v6 :: v_dual_mov_b32 v10, v6
140; GFX12-NEXT:    v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
141; GFX12-NEXT:    v_dual_mov_b32 v2, v8 :: v_dual_mov_b32 v3, v9
142; GFX12-NEXT:    v_mov_b32_e32 v4, v10
143; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s14
144; GFX12-NEXT:    image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe
145; GFX12-NEXT:    s_wait_samplecnt 0x0
146; GFX12-NEXT:    global_store_b32 v6, v4, s[12:13]
147; GFX12-NEXT:    ; return to shader part epilog
148main_body:
149  %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
150  %v.vec = extractvalue {<4 x float>, i32} %v, 0
151  %v.err = extractvalue {<4 x float>, i32} %v, 1
152  store i32 %v.err, ptr addrspace(1) %out, align 4
153  ret <4 x float> %v.vec
154}
155
156define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, ptr addrspace(1) inreg %out, float %s) {
157; VERDE-LABEL: sample_1d_tfe_adjust_writemask_1:
158; VERDE:       ; %bb.0: ; %main_body
159; VERDE-NEXT:    s_mov_b64 s[12:13], exec
160; VERDE-NEXT:    s_wqm_b64 exec, exec
161; VERDE-NEXT:    v_mov_b32_e32 v2, v0
162; VERDE-NEXT:    v_mov_b32_e32 v0, 0
163; VERDE-NEXT:    v_mov_b32_e32 v1, v0
164; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
165; VERDE-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 tfe
166; VERDE-NEXT:    s_waitcnt vmcnt(0)
167; VERDE-NEXT:    ; return to shader part epilog
168;
169; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_1:
170; GFX6789:       ; %bb.0: ; %main_body
171; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
172; GFX6789-NEXT:    s_wqm_b64 exec, exec
173; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
174; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
175; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
176; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
177; GFX6789-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 tfe
178; GFX6789-NEXT:    s_waitcnt vmcnt(0)
179; GFX6789-NEXT:    ; return to shader part epilog
180;
181; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_1:
182; GFX10PLUS:       ; %bb.0: ; %main_body
183; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
184; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
185; GFX10PLUS-NEXT:    v_mov_b32_e32 v2, v0
186; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, 0
187; GFX10PLUS-NEXT:    v_mov_b32_e32 v1, v0
188; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
189; GFX10PLUS-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
190; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
191; GFX10PLUS-NEXT:    ; return to shader part epilog
192;
193; GFX12-LABEL: sample_1d_tfe_adjust_writemask_1:
194; GFX12:       ; %bb.0: ; %main_body
195; GFX12-NEXT:    s_mov_b32 s12, exec_lo
196; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
197; GFX12-NEXT:    v_mov_b32_e32 v2, v0
198; GFX12-NEXT:    v_mov_b32_e32 v0, 0
199; GFX12-NEXT:    v_mov_b32_e32 v1, v0
200; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
201; GFX12-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe
202; GFX12-NEXT:    s_wait_samplecnt 0x0
203; GFX12-NEXT:    ; return to shader part epilog
204main_body:
205  %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
206  %res.vec = extractvalue {<4 x float>,i32} %v, 0
207  %res.f = extractelement <4 x float> %res.vec, i32 0
208  %res.err = extractvalue {<4 x float>,i32} %v, 1
209  %res.errf = bitcast i32 %res.err to float
210  %res.tmp = insertelement <2 x float> undef, float %res.f, i32 0
211  %res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1
212  ret <2 x float> %res
213}
214
215define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
216; VERDE-LABEL: sample_1d_tfe_adjust_writemask_2:
217; VERDE:       ; %bb.0: ; %main_body
218; VERDE-NEXT:    s_mov_b64 s[12:13], exec
219; VERDE-NEXT:    s_wqm_b64 exec, exec
220; VERDE-NEXT:    v_mov_b32_e32 v2, v0
221; VERDE-NEXT:    v_mov_b32_e32 v0, 0
222; VERDE-NEXT:    v_mov_b32_e32 v1, v0
223; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
224; VERDE-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 tfe
225; VERDE-NEXT:    s_waitcnt vmcnt(0)
226; VERDE-NEXT:    ; return to shader part epilog
227;
228; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_2:
229; GFX6789:       ; %bb.0: ; %main_body
230; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
231; GFX6789-NEXT:    s_wqm_b64 exec, exec
232; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
233; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
234; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
235; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
236; GFX6789-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 tfe
237; GFX6789-NEXT:    s_waitcnt vmcnt(0)
238; GFX6789-NEXT:    ; return to shader part epilog
239;
240; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_2:
241; GFX10PLUS:       ; %bb.0: ; %main_body
242; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
243; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
244; GFX10PLUS-NEXT:    v_mov_b32_e32 v2, v0
245; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, 0
246; GFX10PLUS-NEXT:    v_mov_b32_e32 v1, v0
247; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
248; GFX10PLUS-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 dim:SQ_RSRC_IMG_1D tfe
249; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
250; GFX10PLUS-NEXT:    ; return to shader part epilog
251;
252; GFX12-LABEL: sample_1d_tfe_adjust_writemask_2:
253; GFX12:       ; %bb.0: ; %main_body
254; GFX12-NEXT:    s_mov_b32 s12, exec_lo
255; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
256; GFX12-NEXT:    v_mov_b32_e32 v2, v0
257; GFX12-NEXT:    v_mov_b32_e32 v0, 0
258; GFX12-NEXT:    v_mov_b32_e32 v1, v0
259; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
260; GFX12-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 dim:SQ_RSRC_IMG_1D tfe
261; GFX12-NEXT:    s_wait_samplecnt 0x0
262; GFX12-NEXT:    ; return to shader part epilog
263main_body:
264  %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
265  %res.vec = extractvalue {<4 x float>,i32} %v, 0
266  %res.f = extractelement <4 x float> %res.vec, i32 1
267  %res.err = extractvalue {<4 x float>,i32} %v, 1
268  %res.errf = bitcast i32 %res.err to float
269  %res.tmp = insertelement <2 x float> undef, float %res.f, i32 0
270  %res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1
271  ret <2 x float> %res
272}
273
274define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
275; VERDE-LABEL: sample_1d_tfe_adjust_writemask_3:
276; VERDE:       ; %bb.0: ; %main_body
277; VERDE-NEXT:    s_mov_b64 s[12:13], exec
278; VERDE-NEXT:    s_wqm_b64 exec, exec
279; VERDE-NEXT:    v_mov_b32_e32 v2, v0
280; VERDE-NEXT:    v_mov_b32_e32 v0, 0
281; VERDE-NEXT:    v_mov_b32_e32 v1, v0
282; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
283; VERDE-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 tfe
284; VERDE-NEXT:    s_waitcnt vmcnt(0)
285; VERDE-NEXT:    ; return to shader part epilog
286;
287; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_3:
288; GFX6789:       ; %bb.0: ; %main_body
289; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
290; GFX6789-NEXT:    s_wqm_b64 exec, exec
291; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
292; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
293; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
294; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
295; GFX6789-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 tfe
296; GFX6789-NEXT:    s_waitcnt vmcnt(0)
297; GFX6789-NEXT:    ; return to shader part epilog
298;
299; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_3:
300; GFX10PLUS:       ; %bb.0: ; %main_body
301; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
302; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
303; GFX10PLUS-NEXT:    v_mov_b32_e32 v2, v0
304; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, 0
305; GFX10PLUS-NEXT:    v_mov_b32_e32 v1, v0
306; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
307; GFX10PLUS-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_1D tfe
308; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
309; GFX10PLUS-NEXT:    ; return to shader part epilog
310;
311; GFX12-LABEL: sample_1d_tfe_adjust_writemask_3:
312; GFX12:       ; %bb.0: ; %main_body
313; GFX12-NEXT:    s_mov_b32 s12, exec_lo
314; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
315; GFX12-NEXT:    v_mov_b32_e32 v2, v0
316; GFX12-NEXT:    v_mov_b32_e32 v0, 0
317; GFX12-NEXT:    v_mov_b32_e32 v1, v0
318; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
319; GFX12-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_1D tfe
320; GFX12-NEXT:    s_wait_samplecnt 0x0
321; GFX12-NEXT:    ; return to shader part epilog
322main_body:
323  %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
324  %res.vec = extractvalue {<4 x float>,i32} %v, 0
325  %res.f = extractelement <4 x float> %res.vec, i32 2
326  %res.err = extractvalue {<4 x float>,i32} %v, 1
327  %res.errf = bitcast i32 %res.err to float
328  %res.tmp = insertelement <2 x float> undef, float %res.f, i32 0
329  %res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1
330  ret <2 x float> %res
331}
332
333define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
334; VERDE-LABEL: sample_1d_tfe_adjust_writemask_4:
335; VERDE:       ; %bb.0: ; %main_body
336; VERDE-NEXT:    s_mov_b64 s[12:13], exec
337; VERDE-NEXT:    s_wqm_b64 exec, exec
338; VERDE-NEXT:    v_mov_b32_e32 v2, v0
339; VERDE-NEXT:    v_mov_b32_e32 v0, 0
340; VERDE-NEXT:    v_mov_b32_e32 v1, v0
341; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
342; VERDE-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 tfe
343; VERDE-NEXT:    s_waitcnt vmcnt(0)
344; VERDE-NEXT:    ; return to shader part epilog
345;
346; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_4:
347; GFX6789:       ; %bb.0: ; %main_body
348; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
349; GFX6789-NEXT:    s_wqm_b64 exec, exec
350; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
351; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
352; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
353; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
354; GFX6789-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 tfe
355; GFX6789-NEXT:    s_waitcnt vmcnt(0)
356; GFX6789-NEXT:    ; return to shader part epilog
357;
358; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_4:
359; GFX10PLUS:       ; %bb.0: ; %main_body
360; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
361; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
362; GFX10PLUS-NEXT:    v_mov_b32_e32 v2, v0
363; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, 0
364; GFX10PLUS-NEXT:    v_mov_b32_e32 v1, v0
365; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
366; GFX10PLUS-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 dim:SQ_RSRC_IMG_1D tfe
367; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
368; GFX10PLUS-NEXT:    ; return to shader part epilog
369;
370; GFX12-LABEL: sample_1d_tfe_adjust_writemask_4:
371; GFX12:       ; %bb.0: ; %main_body
372; GFX12-NEXT:    s_mov_b32 s12, exec_lo
373; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
374; GFX12-NEXT:    v_mov_b32_e32 v2, v0
375; GFX12-NEXT:    v_mov_b32_e32 v0, 0
376; GFX12-NEXT:    v_mov_b32_e32 v1, v0
377; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
378; GFX12-NEXT:    image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 dim:SQ_RSRC_IMG_1D tfe
379; GFX12-NEXT:    s_wait_samplecnt 0x0
380; GFX12-NEXT:    ; return to shader part epilog
381main_body:
382  %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
383  %res.vec = extractvalue {<4 x float>,i32} %v, 0
384  %res.f = extractelement <4 x float> %res.vec, i32 3
385  %res.err = extractvalue {<4 x float>,i32} %v, 1
386  %res.errf = bitcast i32 %res.err to float
387  %res.tmp = insertelement <2 x float> undef, float %res.f, i32 0
388  %res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1
389  ret <2 x float> %res
390}
391
392define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
393; VERDE-LABEL: sample_1d_tfe_adjust_writemask_12:
394; VERDE:       ; %bb.0: ; %main_body
395; VERDE-NEXT:    s_mov_b64 s[12:13], exec
396; VERDE-NEXT:    s_wqm_b64 exec, exec
397; VERDE-NEXT:    v_mov_b32_e32 v3, v0
398; VERDE-NEXT:    v_mov_b32_e32 v0, 0
399; VERDE-NEXT:    v_mov_b32_e32 v1, v0
400; VERDE-NEXT:    v_mov_b32_e32 v2, v0
401; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
402; VERDE-NEXT:    image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 tfe
403; VERDE-NEXT:    s_waitcnt vmcnt(0)
404; VERDE-NEXT:    ; return to shader part epilog
405;
406; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_12:
407; GFX6789:       ; %bb.0: ; %main_body
408; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
409; GFX6789-NEXT:    s_wqm_b64 exec, exec
410; GFX6789-NEXT:    v_mov_b32_e32 v3, v0
411; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
412; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
413; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
414; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
415; GFX6789-NEXT:    image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 tfe
416; GFX6789-NEXT:    s_waitcnt vmcnt(0)
417; GFX6789-NEXT:    ; return to shader part epilog
418;
419; GFX10-LABEL: sample_1d_tfe_adjust_writemask_12:
420; GFX10:       ; %bb.0: ; %main_body
421; GFX10-NEXT:    s_mov_b32 s12, exec_lo
422; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
423; GFX10-NEXT:    v_mov_b32_e32 v3, v0
424; GFX10-NEXT:    v_mov_b32_e32 v0, 0
425; GFX10-NEXT:    v_mov_b32_e32 v1, v0
426; GFX10-NEXT:    v_mov_b32_e32 v2, v0
427; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
428; GFX10-NEXT:    image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
429; GFX10-NEXT:    s_waitcnt vmcnt(0)
430; GFX10-NEXT:    ; return to shader part epilog
431;
432; GFX11-LABEL: sample_1d_tfe_adjust_writemask_12:
433; GFX11:       ; %bb.0: ; %main_body
434; GFX11-NEXT:    s_mov_b32 s12, exec_lo
435; GFX11-NEXT:    s_wqm_b32 exec_lo, exec_lo
436; GFX11-NEXT:    v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v0, 0
437; GFX11-NEXT:    v_mov_b32_e32 v1, v0
438; GFX11-NEXT:    v_mov_b32_e32 v2, v0
439; GFX11-NEXT:    s_and_b32 exec_lo, exec_lo, s12
440; GFX11-NEXT:    image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
441; GFX11-NEXT:    s_waitcnt vmcnt(0)
442; GFX11-NEXT:    ; return to shader part epilog
443;
444; GFX12-LABEL: sample_1d_tfe_adjust_writemask_12:
445; GFX12:       ; %bb.0: ; %main_body
446; GFX12-NEXT:    s_mov_b32 s12, exec_lo
447; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
448; GFX12-NEXT:    v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v0, 0
449; GFX12-NEXT:    v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v2, v0
450; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
451; GFX12-NEXT:    image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe
452; GFX12-NEXT:    s_wait_samplecnt 0x0
453; GFX12-NEXT:    ; return to shader part epilog
454main_body:
455  %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
456  %res.vec = extractvalue {<4 x float>,i32} %v, 0
457  %res.f1 = extractelement <4 x float> %res.vec, i32 0
458  %res.f2 = extractelement <4 x float> %res.vec, i32 1
459  %res.err = extractvalue {<4 x float>,i32} %v, 1
460  %res.errf = bitcast i32 %res.err to float
461  %res.tmp1 = insertelement <4 x float> undef, float %res.f1, i32 0
462  %res.tmp2 = insertelement <4 x float> %res.tmp1, float %res.f2, i32 1
463  %res = insertelement <4 x float> %res.tmp2, float %res.errf, i32 2
464  ret <4 x float> %res
465}
466
467define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_24(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
468; VERDE-LABEL: sample_1d_tfe_adjust_writemask_24:
469; VERDE:       ; %bb.0: ; %main_body
470; VERDE-NEXT:    s_mov_b64 s[12:13], exec
471; VERDE-NEXT:    s_wqm_b64 exec, exec
472; VERDE-NEXT:    v_mov_b32_e32 v3, v0
473; VERDE-NEXT:    v_mov_b32_e32 v0, 0
474; VERDE-NEXT:    v_mov_b32_e32 v1, v0
475; VERDE-NEXT:    v_mov_b32_e32 v2, v0
476; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
477; VERDE-NEXT:    image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa tfe
478; VERDE-NEXT:    s_waitcnt vmcnt(0)
479; VERDE-NEXT:    ; return to shader part epilog
480;
481; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_24:
482; GFX6789:       ; %bb.0: ; %main_body
483; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
484; GFX6789-NEXT:    s_wqm_b64 exec, exec
485; GFX6789-NEXT:    v_mov_b32_e32 v3, v0
486; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
487; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
488; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
489; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
490; GFX6789-NEXT:    image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa tfe
491; GFX6789-NEXT:    s_waitcnt vmcnt(0)
492; GFX6789-NEXT:    ; return to shader part epilog
493;
494; GFX10-LABEL: sample_1d_tfe_adjust_writemask_24:
495; GFX10:       ; %bb.0: ; %main_body
496; GFX10-NEXT:    s_mov_b32 s12, exec_lo
497; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
498; GFX10-NEXT:    v_mov_b32_e32 v3, v0
499; GFX10-NEXT:    v_mov_b32_e32 v0, 0
500; GFX10-NEXT:    v_mov_b32_e32 v1, v0
501; GFX10-NEXT:    v_mov_b32_e32 v2, v0
502; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s12
503; GFX10-NEXT:    image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D tfe
504; GFX10-NEXT:    s_waitcnt vmcnt(0)
505; GFX10-NEXT:    ; return to shader part epilog
506;
507; GFX11-LABEL: sample_1d_tfe_adjust_writemask_24:
508; GFX11:       ; %bb.0: ; %main_body
509; GFX11-NEXT:    s_mov_b32 s12, exec_lo
510; GFX11-NEXT:    s_wqm_b32 exec_lo, exec_lo
511; GFX11-NEXT:    v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v0, 0
512; GFX11-NEXT:    v_mov_b32_e32 v1, v0
513; GFX11-NEXT:    v_mov_b32_e32 v2, v0
514; GFX11-NEXT:    s_and_b32 exec_lo, exec_lo, s12
515; GFX11-NEXT:    image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D tfe
516; GFX11-NEXT:    s_waitcnt vmcnt(0)
517; GFX11-NEXT:    ; return to shader part epilog
518;
519; GFX12-LABEL: sample_1d_tfe_adjust_writemask_24:
520; GFX12:       ; %bb.0: ; %main_body
521; GFX12-NEXT:    s_mov_b32 s12, exec_lo
522; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
523; GFX12-NEXT:    v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v0, 0
524; GFX12-NEXT:    v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v2, v0
525; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
526; GFX12-NEXT:    image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D tfe
527; GFX12-NEXT:    s_wait_samplecnt 0x0
528; GFX12-NEXT:    ; return to shader part epilog
529main_body:
530  %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
531  %res.vec = extractvalue {<4 x float>,i32} %v, 0
532  %res.f1 = extractelement <4 x float> %res.vec, i32 1
533  %res.f2 = extractelement <4 x float> %res.vec, i32 3
534  %res.err = extractvalue {<4 x float>,i32} %v, 1
535  %res.errf = bitcast i32 %res.err to float
536  %res.tmp1 = insertelement <4 x float> undef, float %res.f1, i32 0
537  %res.tmp2 = insertelement <4 x float> %res.tmp1, float %res.f2, i32 1
538  %res = insertelement <4 x float> %res.tmp2, float %res.errf, i32 2
539  ret <4 x float> %res
540}
541
542define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_134(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
543; VERDE-LABEL: sample_1d_tfe_adjust_writemask_134:
544; VERDE:       ; %bb.0: ; %main_body
545; VERDE-NEXT:    s_mov_b64 s[12:13], exec
546; VERDE-NEXT:    s_wqm_b64 exec, exec
547; VERDE-NEXT:    v_mov_b32_e32 v4, v0
548; VERDE-NEXT:    v_mov_b32_e32 v0, 0
549; VERDE-NEXT:    v_mov_b32_e32 v1, v0
550; VERDE-NEXT:    v_mov_b32_e32 v2, v0
551; VERDE-NEXT:    v_mov_b32_e32 v3, v0
552; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
553; VERDE-NEXT:    image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd tfe
554; VERDE-NEXT:    s_waitcnt vmcnt(0)
555; VERDE-NEXT:    ; return to shader part epilog
556;
557; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_134:
558; GFX6789:       ; %bb.0: ; %main_body
559; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
560; GFX6789-NEXT:    s_wqm_b64 exec, exec
561; GFX6789-NEXT:    v_mov_b32_e32 v4, v0
562; GFX6789-NEXT:    v_mov_b32_e32 v0, 0
563; GFX6789-NEXT:    v_mov_b32_e32 v1, v0
564; GFX6789-NEXT:    v_mov_b32_e32 v2, v0
565; GFX6789-NEXT:    v_mov_b32_e32 v3, v0
566; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
567; GFX6789-NEXT:    image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd tfe
568; GFX6789-NEXT:    s_waitcnt vmcnt(0)
569; GFX6789-NEXT:    ; return to shader part epilog
570;
571; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_134:
572; GFX10PLUS:       ; %bb.0: ; %main_body
573; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
574; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
575; GFX10PLUS-NEXT:    v_mov_b32_e32 v4, v0
576; GFX10PLUS-NEXT:    v_mov_b32_e32 v0, 0
577; GFX10PLUS-NEXT:    v_mov_b32_e32 v1, v0
578; GFX10PLUS-NEXT:    v_mov_b32_e32 v2, v0
579; GFX10PLUS-NEXT:    v_mov_b32_e32 v3, v0
580; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
581; GFX10PLUS-NEXT:    image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd dim:SQ_RSRC_IMG_1D tfe
582; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
583; GFX10PLUS-NEXT:    ; return to shader part epilog
584;
585; GFX12-LABEL: sample_1d_tfe_adjust_writemask_134:
586; GFX12:       ; %bb.0: ; %main_body
587; GFX12-NEXT:    s_mov_b32 s12, exec_lo
588; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
589; GFX12-NEXT:    v_mov_b32_e32 v4, v0
590; GFX12-NEXT:    v_mov_b32_e32 v0, 0
591; GFX12-NEXT:    v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v2, v0
592; GFX12-NEXT:    v_mov_b32_e32 v3, v0
593; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
594; GFX12-NEXT:    image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd dim:SQ_RSRC_IMG_1D tfe
595; GFX12-NEXT:    s_wait_samplecnt 0x0
596; GFX12-NEXT:    ; return to shader part epilog
597main_body:
598  %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
599  %res.vec = extractvalue {<4 x float>,i32} %v, 0
600  %res.f1 = extractelement <4 x float> %res.vec, i32 0
601  %res.f2 = extractelement <4 x float> %res.vec, i32 2
602  %res.f3 = extractelement <4 x float> %res.vec, i32 3
603  %res.err = extractvalue {<4 x float>,i32} %v, 1
604  %res.errf = bitcast i32 %res.err to float
605  %res.tmp1 = insertelement <4 x float> undef, float %res.f1, i32 0
606  %res.tmp2 = insertelement <4 x float> %res.tmp1, float %res.f2, i32 1
607  %res.tmp3 = insertelement <4 x float> %res.tmp2, float %res.f3, i32 2
608  %res = insertelement <4 x float> %res.tmp3, float %res.errf, i32 3
609  ret <4 x float> %res
610}
611
612define amdgpu_ps <4 x float> @sample_1d_lwe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, ptr addrspace(1) inreg %out, float %s) {
613; VERDE-LABEL: sample_1d_lwe:
614; VERDE:       ; %bb.0: ; %main_body
615; VERDE-NEXT:    s_mov_b64 s[14:15], exec
616; VERDE-NEXT:    s_wqm_b64 exec, exec
617; VERDE-NEXT:    v_mov_b32_e32 v5, v0
618; VERDE-NEXT:    v_mov_b32_e32 v0, 0
619; VERDE-NEXT:    v_mov_b32_e32 v1, v0
620; VERDE-NEXT:    v_mov_b32_e32 v2, v0
621; VERDE-NEXT:    v_mov_b32_e32 v3, v0
622; VERDE-NEXT:    v_mov_b32_e32 v4, v0
623; VERDE-NEXT:    s_and_b64 exec, exec, s[14:15]
624; VERDE-NEXT:    image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf lwe
625; VERDE-NEXT:    s_mov_b32 s15, 0xf000
626; VERDE-NEXT:    s_mov_b32 s14, -1
627; VERDE-NEXT:    s_waitcnt vmcnt(0)
628; VERDE-NEXT:    buffer_store_dword v4, off, s[12:15], 0
629; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
630; VERDE-NEXT:    ; return to shader part epilog
631;
632; GFX6789-LABEL: sample_1d_lwe:
633; GFX6789:       ; %bb.0: ; %main_body
634; GFX6789-NEXT:    s_mov_b64 s[14:15], exec
635; GFX6789-NEXT:    s_wqm_b64 exec, exec
636; GFX6789-NEXT:    v_mov_b32_e32 v6, 0
637; GFX6789-NEXT:    v_mov_b32_e32 v5, v0
638; GFX6789-NEXT:    v_mov_b32_e32 v7, v6
639; GFX6789-NEXT:    v_mov_b32_e32 v8, v6
640; GFX6789-NEXT:    v_mov_b32_e32 v9, v6
641; GFX6789-NEXT:    v_mov_b32_e32 v10, v6
642; GFX6789-NEXT:    v_mov_b32_e32 v0, v6
643; GFX6789-NEXT:    v_mov_b32_e32 v1, v7
644; GFX6789-NEXT:    v_mov_b32_e32 v2, v8
645; GFX6789-NEXT:    v_mov_b32_e32 v3, v9
646; GFX6789-NEXT:    v_mov_b32_e32 v4, v10
647; GFX6789-NEXT:    s_and_b64 exec, exec, s[14:15]
648; GFX6789-NEXT:    image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf lwe
649; GFX6789-NEXT:    s_waitcnt vmcnt(0)
650; GFX6789-NEXT:    global_store_dword v6, v4, s[12:13]
651; GFX6789-NEXT:    s_waitcnt vmcnt(0)
652; GFX6789-NEXT:    ; return to shader part epilog
653;
654; GFX10-LABEL: sample_1d_lwe:
655; GFX10:       ; %bb.0: ; %main_body
656; GFX10-NEXT:    s_mov_b32 s14, exec_lo
657; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
658; GFX10-NEXT:    v_mov_b32_e32 v6, 0
659; GFX10-NEXT:    v_mov_b32_e32 v5, v0
660; GFX10-NEXT:    v_mov_b32_e32 v7, v6
661; GFX10-NEXT:    v_mov_b32_e32 v8, v6
662; GFX10-NEXT:    v_mov_b32_e32 v9, v6
663; GFX10-NEXT:    v_mov_b32_e32 v10, v6
664; GFX10-NEXT:    v_mov_b32_e32 v0, v6
665; GFX10-NEXT:    v_mov_b32_e32 v1, v7
666; GFX10-NEXT:    v_mov_b32_e32 v2, v8
667; GFX10-NEXT:    v_mov_b32_e32 v3, v9
668; GFX10-NEXT:    v_mov_b32_e32 v4, v10
669; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s14
670; GFX10-NEXT:    image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe
671; GFX10-NEXT:    s_waitcnt vmcnt(0)
672; GFX10-NEXT:    global_store_dword v6, v4, s[12:13]
673; GFX10-NEXT:    ; return to shader part epilog
674;
675; GFX11-LABEL: sample_1d_lwe:
676; GFX11:       ; %bb.0: ; %main_body
677; GFX11-NEXT:    s_mov_b32 s14, exec_lo
678; GFX11-NEXT:    s_wqm_b32 exec_lo, exec_lo
679; GFX11-NEXT:    v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, 0
680; GFX11-NEXT:    v_mov_b32_e32 v7, v6
681; GFX11-NEXT:    v_mov_b32_e32 v8, v6
682; GFX11-NEXT:    v_mov_b32_e32 v9, v6
683; GFX11-NEXT:    v_mov_b32_e32 v10, v6
684; GFX11-NEXT:    v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
685; GFX11-NEXT:    v_dual_mov_b32 v2, v8 :: v_dual_mov_b32 v3, v9
686; GFX11-NEXT:    v_mov_b32_e32 v4, v10
687; GFX11-NEXT:    s_and_b32 exec_lo, exec_lo, s14
688; GFX11-NEXT:    image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe
689; GFX11-NEXT:    s_waitcnt vmcnt(0)
690; GFX11-NEXT:    global_store_b32 v6, v4, s[12:13]
691; GFX11-NEXT:    ; return to shader part epilog
692;
693; GFX12-LABEL: sample_1d_lwe:
694; GFX12:       ; %bb.0: ; %main_body
695; GFX12-NEXT:    s_mov_b32 s14, exec_lo
696; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
697; GFX12-NEXT:    v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, 0
698; GFX12-NEXT:    v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v8, v6
699; GFX12-NEXT:    v_dual_mov_b32 v9, v6 :: v_dual_mov_b32 v10, v6
700; GFX12-NEXT:    v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7
701; GFX12-NEXT:    v_dual_mov_b32 v2, v8 :: v_dual_mov_b32 v3, v9
702; GFX12-NEXT:    v_mov_b32_e32 v4, v10
703; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s14
704; GFX12-NEXT:    image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe
705; GFX12-NEXT:    s_wait_samplecnt 0x0
706; GFX12-NEXT:    global_store_b32 v6, v4, s[12:13]
707; GFX12-NEXT:    ; return to shader part epilog
708main_body:
709  %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 2, i32 0)
710  %v.vec = extractvalue {<4 x float>, i32} %v, 0
711  %v.err = extractvalue {<4 x float>, i32} %v, 1
712  store i32 %v.err, ptr addrspace(1) %out, align 4
713  ret <4 x float> %v.vec
714}
715
716define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
717; VERDE-LABEL: sample_2d:
718; VERDE:       ; %bb.0: ; %main_body
719; VERDE-NEXT:    s_mov_b64 s[12:13], exec
720; VERDE-NEXT:    s_wqm_b64 exec, exec
721; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
722; VERDE-NEXT:    image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
723; VERDE-NEXT:    s_waitcnt vmcnt(0)
724; VERDE-NEXT:    ; return to shader part epilog
725;
726; GFX6789-LABEL: sample_2d:
727; GFX6789:       ; %bb.0: ; %main_body
728; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
729; GFX6789-NEXT:    s_wqm_b64 exec, exec
730; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
731; GFX6789-NEXT:    image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
732; GFX6789-NEXT:    s_waitcnt vmcnt(0)
733; GFX6789-NEXT:    ; return to shader part epilog
734;
735; GFX10PLUS-LABEL: sample_2d:
736; GFX10PLUS:       ; %bb.0: ; %main_body
737; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
738; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
739; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
740; GFX10PLUS-NEXT:    image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
741; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
742; GFX10PLUS-NEXT:    ; return to shader part epilog
743;
744; GFX12-LABEL: sample_2d:
745; GFX12:       ; %bb.0: ; %main_body
746; GFX12-NEXT:    s_mov_b32 s12, exec_lo
747; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
748; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
749; GFX12-NEXT:    image_sample v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
750; GFX12-NEXT:    s_wait_samplecnt 0x0
751; GFX12-NEXT:    ; return to shader part epilog
752main_body:
753  %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
754  ret <4 x float> %v
755}
756
757define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %r) {
758; VERDE-LABEL: sample_3d:
759; VERDE:       ; %bb.0: ; %main_body
760; VERDE-NEXT:    s_mov_b64 s[12:13], exec
761; VERDE-NEXT:    s_wqm_b64 exec, exec
762; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
763; VERDE-NEXT:    image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
764; VERDE-NEXT:    s_waitcnt vmcnt(0)
765; VERDE-NEXT:    ; return to shader part epilog
766;
767; GFX6789-LABEL: sample_3d:
768; GFX6789:       ; %bb.0: ; %main_body
769; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
770; GFX6789-NEXT:    s_wqm_b64 exec, exec
771; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
772; GFX6789-NEXT:    image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
773; GFX6789-NEXT:    s_waitcnt vmcnt(0)
774; GFX6789-NEXT:    ; return to shader part epilog
775;
776; GFX10PLUS-LABEL: sample_3d:
777; GFX10PLUS:       ; %bb.0: ; %main_body
778; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
779; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
780; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
781; GFX10PLUS-NEXT:    image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
782; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
783; GFX10PLUS-NEXT:    ; return to shader part epilog
784;
785; GFX12-LABEL: sample_3d:
786; GFX12:       ; %bb.0: ; %main_body
787; GFX12-NEXT:    s_mov_b32 s12, exec_lo
788; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
789; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
790; GFX12-NEXT:    image_sample v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D
791; GFX12-NEXT:    s_wait_samplecnt 0x0
792; GFX12-NEXT:    ; return to shader part epilog
793main_body:
794  %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
795  ret <4 x float> %v
796}
797
798define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) {
799; VERDE-LABEL: sample_cube:
800; VERDE:       ; %bb.0: ; %main_body
801; VERDE-NEXT:    s_mov_b64 s[12:13], exec
802; VERDE-NEXT:    s_wqm_b64 exec, exec
803; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
804; VERDE-NEXT:    image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
805; VERDE-NEXT:    s_waitcnt vmcnt(0)
806; VERDE-NEXT:    ; return to shader part epilog
807;
808; GFX6789-LABEL: sample_cube:
809; GFX6789:       ; %bb.0: ; %main_body
810; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
811; GFX6789-NEXT:    s_wqm_b64 exec, exec
812; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
813; GFX6789-NEXT:    image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
814; GFX6789-NEXT:    s_waitcnt vmcnt(0)
815; GFX6789-NEXT:    ; return to shader part epilog
816;
817; GFX10PLUS-LABEL: sample_cube:
818; GFX10PLUS:       ; %bb.0: ; %main_body
819; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
820; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
821; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
822; GFX10PLUS-NEXT:    image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE
823; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
824; GFX10PLUS-NEXT:    ; return to shader part epilog
825;
826; GFX12-LABEL: sample_cube:
827; GFX12:       ; %bb.0: ; %main_body
828; GFX12-NEXT:    s_mov_b32 s12, exec_lo
829; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
830; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
831; GFX12-NEXT:    image_sample v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE
832; GFX12-NEXT:    s_wait_samplecnt 0x0
833; GFX12-NEXT:    ; return to shader part epilog
834main_body:
835  %v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
836  ret <4 x float> %v
837}
838
839define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %slice) {
840; VERDE-LABEL: sample_1darray:
841; VERDE:       ; %bb.0: ; %main_body
842; VERDE-NEXT:    s_mov_b64 s[12:13], exec
843; VERDE-NEXT:    s_wqm_b64 exec, exec
844; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
845; VERDE-NEXT:    image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf da
846; VERDE-NEXT:    s_waitcnt vmcnt(0)
847; VERDE-NEXT:    ; return to shader part epilog
848;
849; GFX6789-LABEL: sample_1darray:
850; GFX6789:       ; %bb.0: ; %main_body
851; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
852; GFX6789-NEXT:    s_wqm_b64 exec, exec
853; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
854; GFX6789-NEXT:    image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf da
855; GFX6789-NEXT:    s_waitcnt vmcnt(0)
856; GFX6789-NEXT:    ; return to shader part epilog
857;
858; GFX10PLUS-LABEL: sample_1darray:
859; GFX10PLUS:       ; %bb.0: ; %main_body
860; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
861; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
862; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
863; GFX10PLUS-NEXT:    image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY
864; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
865; GFX10PLUS-NEXT:    ; return to shader part epilog
866;
867; GFX12-LABEL: sample_1darray:
868; GFX12:       ; %bb.0: ; %main_body
869; GFX12-NEXT:    s_mov_b32 s12, exec_lo
870; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
871; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
872; GFX12-NEXT:    image_sample v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY
873; GFX12-NEXT:    s_wait_samplecnt 0x0
874; GFX12-NEXT:    ; return to shader part epilog
875main_body:
876  %v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
877  ret <4 x float> %v
878}
879
880define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) {
881; VERDE-LABEL: sample_2darray:
882; VERDE:       ; %bb.0: ; %main_body
883; VERDE-NEXT:    s_mov_b64 s[12:13], exec
884; VERDE-NEXT:    s_wqm_b64 exec, exec
885; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
886; VERDE-NEXT:    image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
887; VERDE-NEXT:    s_waitcnt vmcnt(0)
888; VERDE-NEXT:    ; return to shader part epilog
889;
890; GFX6789-LABEL: sample_2darray:
891; GFX6789:       ; %bb.0: ; %main_body
892; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
893; GFX6789-NEXT:    s_wqm_b64 exec, exec
894; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
895; GFX6789-NEXT:    image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da
896; GFX6789-NEXT:    s_waitcnt vmcnt(0)
897; GFX6789-NEXT:    ; return to shader part epilog
898;
899; GFX10PLUS-LABEL: sample_2darray:
900; GFX10PLUS:       ; %bb.0: ; %main_body
901; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
902; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
903; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
904; GFX10PLUS-NEXT:    image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY
905; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
906; GFX10PLUS-NEXT:    ; return to shader part epilog
907;
908; GFX12-LABEL: sample_2darray:
909; GFX12:       ; %bb.0: ; %main_body
910; GFX12-NEXT:    s_mov_b32 s12, exec_lo
911; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
912; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
913; GFX12-NEXT:    image_sample v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY
914; GFX12-NEXT:    s_wait_samplecnt 0x0
915; GFX12-NEXT:    ; return to shader part epilog
916main_body:
917  %v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
918  ret <4 x float> %v
919}
920
921define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
922; VERDE-LABEL: sample_c_1d:
923; VERDE:       ; %bb.0: ; %main_body
924; VERDE-NEXT:    s_mov_b64 s[12:13], exec
925; VERDE-NEXT:    s_wqm_b64 exec, exec
926; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
927; VERDE-NEXT:    image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
928; VERDE-NEXT:    s_waitcnt vmcnt(0)
929; VERDE-NEXT:    ; return to shader part epilog
930;
931; GFX6789-LABEL: sample_c_1d:
932; GFX6789:       ; %bb.0: ; %main_body
933; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
934; GFX6789-NEXT:    s_wqm_b64 exec, exec
935; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
936; GFX6789-NEXT:    image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
937; GFX6789-NEXT:    s_waitcnt vmcnt(0)
938; GFX6789-NEXT:    ; return to shader part epilog
939;
940; GFX10PLUS-LABEL: sample_c_1d:
941; GFX10PLUS:       ; %bb.0: ; %main_body
942; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
943; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
944; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
945; GFX10PLUS-NEXT:    image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
946; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
947; GFX10PLUS-NEXT:    ; return to shader part epilog
948;
949; GFX12-LABEL: sample_c_1d:
950; GFX12:       ; %bb.0: ; %main_body
951; GFX12-NEXT:    s_mov_b32 s12, exec_lo
952; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
953; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
954; GFX12-NEXT:    image_sample_c v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
955; GFX12-NEXT:    s_wait_samplecnt 0x0
956; GFX12-NEXT:    ; return to shader part epilog
957main_body:
958  %v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
959  ret <4 x float> %v
960}
961
962define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
963; VERDE-LABEL: sample_c_2d:
964; VERDE:       ; %bb.0: ; %main_body
965; VERDE-NEXT:    s_mov_b64 s[12:13], exec
966; VERDE-NEXT:    s_wqm_b64 exec, exec
967; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
968; VERDE-NEXT:    image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
969; VERDE-NEXT:    s_waitcnt vmcnt(0)
970; VERDE-NEXT:    ; return to shader part epilog
971;
972; GFX6789-LABEL: sample_c_2d:
973; GFX6789:       ; %bb.0: ; %main_body
974; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
975; GFX6789-NEXT:    s_wqm_b64 exec, exec
976; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
977; GFX6789-NEXT:    image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
978; GFX6789-NEXT:    s_waitcnt vmcnt(0)
979; GFX6789-NEXT:    ; return to shader part epilog
980;
981; GFX10PLUS-LABEL: sample_c_2d:
982; GFX10PLUS:       ; %bb.0: ; %main_body
983; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
984; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
985; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
986; GFX10PLUS-NEXT:    image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
987; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
988; GFX10PLUS-NEXT:    ; return to shader part epilog
989;
990; GFX12-LABEL: sample_c_2d:
991; GFX12:       ; %bb.0: ; %main_body
992; GFX12-NEXT:    s_mov_b32 s12, exec_lo
993; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
994; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
995; GFX12-NEXT:    image_sample_c v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
996; GFX12-NEXT:    s_wait_samplecnt 0x0
997; GFX12-NEXT:    ; return to shader part epilog
998main_body:
999  %v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1000  ret <4 x float> %v
1001}
1002
1003define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %clamp) {
1004; VERDE-LABEL: sample_cl_1d:
1005; VERDE:       ; %bb.0: ; %main_body
1006; VERDE-NEXT:    s_mov_b64 s[12:13], exec
1007; VERDE-NEXT:    s_wqm_b64 exec, exec
1008; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
1009; VERDE-NEXT:    image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
1010; VERDE-NEXT:    s_waitcnt vmcnt(0)
1011; VERDE-NEXT:    ; return to shader part epilog
1012;
1013; GFX6789-LABEL: sample_cl_1d:
1014; GFX6789:       ; %bb.0: ; %main_body
1015; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
1016; GFX6789-NEXT:    s_wqm_b64 exec, exec
1017; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
1018; GFX6789-NEXT:    image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
1019; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1020; GFX6789-NEXT:    ; return to shader part epilog
1021;
1022; GFX10PLUS-LABEL: sample_cl_1d:
1023; GFX10PLUS:       ; %bb.0: ; %main_body
1024; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
1025; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
1026; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1027; GFX10PLUS-NEXT:    image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1028; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1029; GFX10PLUS-NEXT:    ; return to shader part epilog
1030;
1031; GFX12-LABEL: sample_cl_1d:
1032; GFX12:       ; %bb.0: ; %main_body
1033; GFX12-NEXT:    s_mov_b32 s12, exec_lo
1034; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
1035; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1036; GFX12-NEXT:    image_sample_cl v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1037; GFX12-NEXT:    s_wait_samplecnt 0x0
1038; GFX12-NEXT:    ; return to shader part epilog
1039main_body:
1040  %v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32 15, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1041  ret <4 x float> %v
1042}
1043
1044define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) {
1045; VERDE-LABEL: sample_cl_2d:
1046; VERDE:       ; %bb.0: ; %main_body
1047; VERDE-NEXT:    s_mov_b64 s[12:13], exec
1048; VERDE-NEXT:    s_wqm_b64 exec, exec
1049; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
1050; VERDE-NEXT:    image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1051; VERDE-NEXT:    s_waitcnt vmcnt(0)
1052; VERDE-NEXT:    ; return to shader part epilog
1053;
1054; GFX6789-LABEL: sample_cl_2d:
1055; GFX6789:       ; %bb.0: ; %main_body
1056; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
1057; GFX6789-NEXT:    s_wqm_b64 exec, exec
1058; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
1059; GFX6789-NEXT:    image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1060; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1061; GFX6789-NEXT:    ; return to shader part epilog
1062;
1063; GFX10PLUS-LABEL: sample_cl_2d:
1064; GFX10PLUS:       ; %bb.0: ; %main_body
1065; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
1066; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
1067; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1068; GFX10PLUS-NEXT:    image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1069; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1070; GFX10PLUS-NEXT:    ; return to shader part epilog
1071;
1072; GFX12-LABEL: sample_cl_2d:
1073; GFX12:       ; %bb.0: ; %main_body
1074; GFX12-NEXT:    s_mov_b32 s12, exec_lo
1075; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
1076; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1077; GFX12-NEXT:    image_sample_cl v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1078; GFX12-NEXT:    s_wait_samplecnt 0x0
1079; GFX12-NEXT:    ; return to shader part epilog
1080main_body:
1081  %v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32 15, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1082  ret <4 x float> %v
1083}
1084
1085define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %clamp) {
1086; VERDE-LABEL: sample_c_cl_1d:
1087; VERDE:       ; %bb.0: ; %main_body
1088; VERDE-NEXT:    s_mov_b64 s[12:13], exec
1089; VERDE-NEXT:    s_wqm_b64 exec, exec
1090; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
1091; VERDE-NEXT:    image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1092; VERDE-NEXT:    s_waitcnt vmcnt(0)
1093; VERDE-NEXT:    ; return to shader part epilog
1094;
1095; GFX6789-LABEL: sample_c_cl_1d:
1096; GFX6789:       ; %bb.0: ; %main_body
1097; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
1098; GFX6789-NEXT:    s_wqm_b64 exec, exec
1099; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
1100; GFX6789-NEXT:    image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1101; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1102; GFX6789-NEXT:    ; return to shader part epilog
1103;
1104; GFX10PLUS-LABEL: sample_c_cl_1d:
1105; GFX10PLUS:       ; %bb.0: ; %main_body
1106; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
1107; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
1108; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1109; GFX10PLUS-NEXT:    image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1110; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1111; GFX10PLUS-NEXT:    ; return to shader part epilog
1112;
1113; GFX12-LABEL: sample_c_cl_1d:
1114; GFX12:       ; %bb.0: ; %main_body
1115; GFX12-NEXT:    s_mov_b32 s12, exec_lo
1116; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
1117; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1118; GFX12-NEXT:    image_sample_c_cl v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1119; GFX12-NEXT:    s_wait_samplecnt 0x0
1120; GFX12-NEXT:    ; return to shader part epilog
1121main_body:
1122  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1123  ret <4 x float> %v
1124}
1125
1126define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) {
1127; VERDE-LABEL: sample_c_cl_2d:
1128; VERDE:       ; %bb.0: ; %main_body
1129; VERDE-NEXT:    s_mov_b64 s[12:13], exec
1130; VERDE-NEXT:    s_wqm_b64 exec, exec
1131; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
1132; VERDE-NEXT:    image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1133; VERDE-NEXT:    s_waitcnt vmcnt(0)
1134; VERDE-NEXT:    ; return to shader part epilog
1135;
1136; GFX6789-LABEL: sample_c_cl_2d:
1137; GFX6789:       ; %bb.0: ; %main_body
1138; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
1139; GFX6789-NEXT:    s_wqm_b64 exec, exec
1140; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
1141; GFX6789-NEXT:    image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1142; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1143; GFX6789-NEXT:    ; return to shader part epilog
1144;
1145; GFX10PLUS-LABEL: sample_c_cl_2d:
1146; GFX10PLUS:       ; %bb.0: ; %main_body
1147; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
1148; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
1149; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1150; GFX10PLUS-NEXT:    image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1151; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1152; GFX10PLUS-NEXT:    ; return to shader part epilog
1153;
1154; GFX12-LABEL: sample_c_cl_2d:
1155; GFX12:       ; %bb.0: ; %main_body
1156; GFX12-NEXT:    s_mov_b32 s12, exec_lo
1157; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
1158; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1159; GFX12-NEXT:    image_sample_c_cl v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1160; GFX12-NEXT:    s_wait_samplecnt 0x0
1161; GFX12-NEXT:    ; return to shader part epilog
1162main_body:
1163  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1164  ret <4 x float> %v
1165}
1166
1167define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s) {
1168; VERDE-LABEL: sample_b_1d:
1169; VERDE:       ; %bb.0: ; %main_body
1170; VERDE-NEXT:    s_mov_b64 s[12:13], exec
1171; VERDE-NEXT:    s_wqm_b64 exec, exec
1172; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
1173; VERDE-NEXT:    image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
1174; VERDE-NEXT:    s_waitcnt vmcnt(0)
1175; VERDE-NEXT:    ; return to shader part epilog
1176;
1177; GFX6789-LABEL: sample_b_1d:
1178; GFX6789:       ; %bb.0: ; %main_body
1179; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
1180; GFX6789-NEXT:    s_wqm_b64 exec, exec
1181; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
1182; GFX6789-NEXT:    image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
1183; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1184; GFX6789-NEXT:    ; return to shader part epilog
1185;
1186; GFX10PLUS-LABEL: sample_b_1d:
1187; GFX10PLUS:       ; %bb.0: ; %main_body
1188; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
1189; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
1190; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1191; GFX10PLUS-NEXT:    image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1192; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1193; GFX10PLUS-NEXT:    ; return to shader part epilog
1194;
1195; GFX12-LABEL: sample_b_1d:
1196; GFX12:       ; %bb.0: ; %main_body
1197; GFX12-NEXT:    s_mov_b32 s12, exec_lo
1198; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
1199; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1200; GFX12-NEXT:    image_sample_b v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1201; GFX12-NEXT:    s_wait_samplecnt 0x0
1202; GFX12-NEXT:    ; return to shader part epilog
1203main_body:
1204  %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float %bias, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1205  ret <4 x float> %v
1206}
1207
1208define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) {
1209; VERDE-LABEL: sample_b_2d:
1210; VERDE:       ; %bb.0: ; %main_body
1211; VERDE-NEXT:    s_mov_b64 s[12:13], exec
1212; VERDE-NEXT:    s_wqm_b64 exec, exec
1213; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
1214; VERDE-NEXT:    image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1215; VERDE-NEXT:    s_waitcnt vmcnt(0)
1216; VERDE-NEXT:    ; return to shader part epilog
1217;
1218; GFX6789-LABEL: sample_b_2d:
1219; GFX6789:       ; %bb.0: ; %main_body
1220; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
1221; GFX6789-NEXT:    s_wqm_b64 exec, exec
1222; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
1223; GFX6789-NEXT:    image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1224; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1225; GFX6789-NEXT:    ; return to shader part epilog
1226;
1227; GFX10PLUS-LABEL: sample_b_2d:
1228; GFX10PLUS:       ; %bb.0: ; %main_body
1229; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
1230; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
1231; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1232; GFX10PLUS-NEXT:    image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1233; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1234; GFX10PLUS-NEXT:    ; return to shader part epilog
1235;
1236; GFX12-LABEL: sample_b_2d:
1237; GFX12:       ; %bb.0: ; %main_body
1238; GFX12-NEXT:    s_mov_b32 s12, exec_lo
1239; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
1240; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1241; GFX12-NEXT:    image_sample_b v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1242; GFX12-NEXT:    s_wait_samplecnt 0x0
1243; GFX12-NEXT:    ; return to shader part epilog
1244main_body:
1245  %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1246  ret <4 x float> %v
1247}
1248
1249define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s) {
1250; VERDE-LABEL: sample_c_b_1d:
1251; VERDE:       ; %bb.0: ; %main_body
1252; VERDE-NEXT:    s_mov_b64 s[12:13], exec
1253; VERDE-NEXT:    s_wqm_b64 exec, exec
1254; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
1255; VERDE-NEXT:    image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1256; VERDE-NEXT:    s_waitcnt vmcnt(0)
1257; VERDE-NEXT:    ; return to shader part epilog
1258;
1259; GFX6789-LABEL: sample_c_b_1d:
1260; GFX6789:       ; %bb.0: ; %main_body
1261; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
1262; GFX6789-NEXT:    s_wqm_b64 exec, exec
1263; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
1264; GFX6789-NEXT:    image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1265; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1266; GFX6789-NEXT:    ; return to shader part epilog
1267;
1268; GFX10PLUS-LABEL: sample_c_b_1d:
1269; GFX10PLUS:       ; %bb.0: ; %main_body
1270; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
1271; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
1272; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1273; GFX10PLUS-NEXT:    image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1274; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1275; GFX10PLUS-NEXT:    ; return to shader part epilog
1276;
1277; GFX12-LABEL: sample_c_b_1d:
1278; GFX12:       ; %bb.0: ; %main_body
1279; GFX12-NEXT:    s_mov_b32 s12, exec_lo
1280; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
1281; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1282; GFX12-NEXT:    image_sample_c_b v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1283; GFX12-NEXT:    s_wait_samplecnt 0x0
1284; GFX12-NEXT:    ; return to shader part epilog
1285main_body:
1286  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1287  ret <4 x float> %v
1288}
1289
1290define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) {
1291; VERDE-LABEL: sample_c_b_2d:
1292; VERDE:       ; %bb.0: ; %main_body
1293; VERDE-NEXT:    s_mov_b64 s[12:13], exec
1294; VERDE-NEXT:    s_wqm_b64 exec, exec
1295; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
1296; VERDE-NEXT:    image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1297; VERDE-NEXT:    s_waitcnt vmcnt(0)
1298; VERDE-NEXT:    ; return to shader part epilog
1299;
1300; GFX6789-LABEL: sample_c_b_2d:
1301; GFX6789:       ; %bb.0: ; %main_body
1302; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
1303; GFX6789-NEXT:    s_wqm_b64 exec, exec
1304; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
1305; GFX6789-NEXT:    image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1306; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1307; GFX6789-NEXT:    ; return to shader part epilog
1308;
1309; GFX10PLUS-LABEL: sample_c_b_2d:
1310; GFX10PLUS:       ; %bb.0: ; %main_body
1311; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
1312; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
1313; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1314; GFX10PLUS-NEXT:    image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1315; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1316; GFX10PLUS-NEXT:    ; return to shader part epilog
1317;
1318; GFX12-LABEL: sample_c_b_2d:
1319; GFX12:       ; %bb.0: ; %main_body
1320; GFX12-NEXT:    s_mov_b32 s12, exec_lo
1321; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
1322; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1323; GFX12-NEXT:    image_sample_c_b v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1324; GFX12-NEXT:    s_wait_samplecnt 0x0
1325; GFX12-NEXT:    ; return to shader part epilog
1326main_body:
1327  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1328  ret <4 x float> %v
1329}
1330
1331define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %clamp) {
1332; VERDE-LABEL: sample_b_cl_1d:
1333; VERDE:       ; %bb.0: ; %main_body
1334; VERDE-NEXT:    s_mov_b64 s[12:13], exec
1335; VERDE-NEXT:    s_wqm_b64 exec, exec
1336; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
1337; VERDE-NEXT:    image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1338; VERDE-NEXT:    s_waitcnt vmcnt(0)
1339; VERDE-NEXT:    ; return to shader part epilog
1340;
1341; GFX6789-LABEL: sample_b_cl_1d:
1342; GFX6789:       ; %bb.0: ; %main_body
1343; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
1344; GFX6789-NEXT:    s_wqm_b64 exec, exec
1345; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
1346; GFX6789-NEXT:    image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1347; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1348; GFX6789-NEXT:    ; return to shader part epilog
1349;
1350; GFX10PLUS-LABEL: sample_b_cl_1d:
1351; GFX10PLUS:       ; %bb.0: ; %main_body
1352; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
1353; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
1354; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1355; GFX10PLUS-NEXT:    image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1356; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1357; GFX10PLUS-NEXT:    ; return to shader part epilog
1358;
1359; GFX12-LABEL: sample_b_cl_1d:
1360; GFX12:       ; %bb.0: ; %main_body
1361; GFX12-NEXT:    s_mov_b32 s12, exec_lo
1362; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
1363; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1364; GFX12-NEXT:    image_sample_b_cl v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1365; GFX12-NEXT:    s_wait_samplecnt 0x0
1366; GFX12-NEXT:    ; return to shader part epilog
1367main_body:
1368  %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1369  ret <4 x float> %v
1370}
1371
1372define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) {
1373; VERDE-LABEL: sample_b_cl_2d:
1374; VERDE:       ; %bb.0: ; %main_body
1375; VERDE-NEXT:    s_mov_b64 s[12:13], exec
1376; VERDE-NEXT:    s_wqm_b64 exec, exec
1377; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
1378; VERDE-NEXT:    image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1379; VERDE-NEXT:    s_waitcnt vmcnt(0)
1380; VERDE-NEXT:    ; return to shader part epilog
1381;
1382; GFX6789-LABEL: sample_b_cl_2d:
1383; GFX6789:       ; %bb.0: ; %main_body
1384; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
1385; GFX6789-NEXT:    s_wqm_b64 exec, exec
1386; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
1387; GFX6789-NEXT:    image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1388; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1389; GFX6789-NEXT:    ; return to shader part epilog
1390;
1391; GFX10PLUS-LABEL: sample_b_cl_2d:
1392; GFX10PLUS:       ; %bb.0: ; %main_body
1393; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
1394; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
1395; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1396; GFX10PLUS-NEXT:    image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1397; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1398; GFX10PLUS-NEXT:    ; return to shader part epilog
1399;
1400; GFX12-LABEL: sample_b_cl_2d:
1401; GFX12:       ; %bb.0: ; %main_body
1402; GFX12-NEXT:    s_mov_b32 s12, exec_lo
1403; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
1404; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1405; GFX12-NEXT:    image_sample_b_cl v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1406; GFX12-NEXT:    s_wait_samplecnt 0x0
1407; GFX12-NEXT:    ; return to shader part epilog
1408main_body:
1409  %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1410  ret <4 x float> %v
1411}
1412
1413define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %clamp) {
1414; VERDE-LABEL: sample_c_b_cl_1d:
1415; VERDE:       ; %bb.0: ; %main_body
1416; VERDE-NEXT:    s_mov_b64 s[12:13], exec
1417; VERDE-NEXT:    s_wqm_b64 exec, exec
1418; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
1419; VERDE-NEXT:    image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1420; VERDE-NEXT:    s_waitcnt vmcnt(0)
1421; VERDE-NEXT:    ; return to shader part epilog
1422;
1423; GFX6789-LABEL: sample_c_b_cl_1d:
1424; GFX6789:       ; %bb.0: ; %main_body
1425; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
1426; GFX6789-NEXT:    s_wqm_b64 exec, exec
1427; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
1428; GFX6789-NEXT:    image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1429; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1430; GFX6789-NEXT:    ; return to shader part epilog
1431;
1432; GFX10PLUS-LABEL: sample_c_b_cl_1d:
1433; GFX10PLUS:       ; %bb.0: ; %main_body
1434; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
1435; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
1436; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1437; GFX10PLUS-NEXT:    image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1438; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1439; GFX10PLUS-NEXT:    ; return to shader part epilog
1440;
1441; GFX12-LABEL: sample_c_b_cl_1d:
1442; GFX12:       ; %bb.0: ; %main_body
1443; GFX12-NEXT:    s_mov_b32 s12, exec_lo
1444; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
1445; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1446; GFX12-NEXT:    image_sample_c_b_cl v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1447; GFX12-NEXT:    s_wait_samplecnt 0x0
1448; GFX12-NEXT:    ; return to shader part epilog
1449main_body:
1450  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1451  ret <4 x float> %v
1452}
1453
1454define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) {
1455; VERDE-LABEL: sample_c_b_cl_2d:
1456; VERDE:       ; %bb.0: ; %main_body
1457; VERDE-NEXT:    s_mov_b64 s[12:13], exec
1458; VERDE-NEXT:    s_wqm_b64 exec, exec
1459; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
1460; VERDE-NEXT:    image_sample_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf
1461; VERDE-NEXT:    s_waitcnt vmcnt(0)
1462; VERDE-NEXT:    ; return to shader part epilog
1463;
1464; GFX6789-LABEL: sample_c_b_cl_2d:
1465; GFX6789:       ; %bb.0: ; %main_body
1466; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
1467; GFX6789-NEXT:    s_wqm_b64 exec, exec
1468; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
1469; GFX6789-NEXT:    image_sample_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf
1470; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1471; GFX6789-NEXT:    ; return to shader part epilog
1472;
1473; GFX10PLUS-LABEL: sample_c_b_cl_2d:
1474; GFX10PLUS:       ; %bb.0: ; %main_body
1475; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
1476; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
1477; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1478; GFX10PLUS-NEXT:    image_sample_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1479; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1480; GFX10PLUS-NEXT:    ; return to shader part epilog
1481;
1482; GFX12-LABEL: sample_c_b_cl_2d:
1483; GFX12:       ; %bb.0: ; %main_body
1484; GFX12-NEXT:    s_mov_b32 s12, exec_lo
1485; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
1486; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
1487; GFX12-NEXT:    image_sample_c_b_cl v[0:3], [v0, v1, v2, v[3:4]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1488; GFX12-NEXT:    s_wait_samplecnt 0x0
1489; GFX12-NEXT:    ; return to shader part epilog
1490main_body:
1491  %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1492  ret <4 x float> %v
1493}
1494
1495define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) {
1496; VERDE-LABEL: sample_d_1d:
1497; VERDE:       ; %bb.0: ; %main_body
1498; VERDE-NEXT:    image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1499; VERDE-NEXT:    s_waitcnt vmcnt(0)
1500; VERDE-NEXT:    ; return to shader part epilog
1501;
1502; GFX6789-LABEL: sample_d_1d:
1503; GFX6789:       ; %bb.0: ; %main_body
1504; GFX6789-NEXT:    image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1505; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1506; GFX6789-NEXT:    ; return to shader part epilog
1507;
1508; GFX10PLUS-LABEL: sample_d_1d:
1509; GFX10PLUS:       ; %bb.0: ; %main_body
1510; GFX10PLUS-NEXT:    image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1511; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1512; GFX10PLUS-NEXT:    ; return to shader part epilog
1513;
1514; GFX12-LABEL: sample_d_1d:
1515; GFX12:       ; %bb.0: ; %main_body
1516; GFX12-NEXT:    image_sample_d v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1517; GFX12-NEXT:    s_wait_samplecnt 0x0
1518; GFX12-NEXT:    ; return to shader part epilog
1519main_body:
1520  %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1521  ret <4 x float> %v
1522}
1523
1524define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
1525; VERDE-LABEL: sample_d_2d:
1526; VERDE:       ; %bb.0: ; %main_body
1527; VERDE-NEXT:    image_sample_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf
1528; VERDE-NEXT:    s_waitcnt vmcnt(0)
1529; VERDE-NEXT:    ; return to shader part epilog
1530;
1531; GFX6789-LABEL: sample_d_2d:
1532; GFX6789:       ; %bb.0: ; %main_body
1533; GFX6789-NEXT:    image_sample_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf
1534; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1535; GFX6789-NEXT:    ; return to shader part epilog
1536;
1537; GFX10PLUS-LABEL: sample_d_2d:
1538; GFX10PLUS:       ; %bb.0: ; %main_body
1539; GFX10PLUS-NEXT:    image_sample_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1540; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1541; GFX10PLUS-NEXT:    ; return to shader part epilog
1542;
1543; GFX12-LABEL: sample_d_2d:
1544; GFX12:       ; %bb.0: ; %main_body
1545; GFX12-NEXT:    image_sample_d v[0:3], [v0, v1, v2, v[3:5]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1546; GFX12-NEXT:    s_wait_samplecnt 0x0
1547; GFX12-NEXT:    ; return to shader part epilog
1548main_body:
1549  %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1550  ret <4 x float> %v
1551}
1552
1553define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) {
1554; VERDE-LABEL: sample_c_d_1d:
1555; VERDE:       ; %bb.0: ; %main_body
1556; VERDE-NEXT:    image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1557; VERDE-NEXT:    s_waitcnt vmcnt(0)
1558; VERDE-NEXT:    ; return to shader part epilog
1559;
1560; GFX6789-LABEL: sample_c_d_1d:
1561; GFX6789:       ; %bb.0: ; %main_body
1562; GFX6789-NEXT:    image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1563; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1564; GFX6789-NEXT:    ; return to shader part epilog
1565;
1566; GFX10PLUS-LABEL: sample_c_d_1d:
1567; GFX10PLUS:       ; %bb.0: ; %main_body
1568; GFX10PLUS-NEXT:    image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1569; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1570; GFX10PLUS-NEXT:    ; return to shader part epilog
1571;
1572; GFX12-LABEL: sample_c_d_1d:
1573; GFX12:       ; %bb.0: ; %main_body
1574; GFX12-NEXT:    image_sample_c_d v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1575; GFX12-NEXT:    s_wait_samplecnt 0x0
1576; GFX12-NEXT:    ; return to shader part epilog
1577main_body:
1578  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1579  ret <4 x float> %v
1580}
1581
1582define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) {
1583; VERDE-LABEL: sample_c_d_2d:
1584; VERDE:       ; %bb.0: ; %main_body
1585; VERDE-NEXT:    image_sample_c_d v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf
1586; VERDE-NEXT:    s_waitcnt vmcnt(0)
1587; VERDE-NEXT:    ; return to shader part epilog
1588;
1589; GFX6789-LABEL: sample_c_d_2d:
1590; GFX6789:       ; %bb.0: ; %main_body
1591; GFX6789-NEXT:    image_sample_c_d v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf
1592; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1593; GFX6789-NEXT:    ; return to shader part epilog
1594;
1595; GFX10PLUS-LABEL: sample_c_d_2d:
1596; GFX10PLUS:       ; %bb.0: ; %main_body
1597; GFX10PLUS-NEXT:    image_sample_c_d v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1598; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1599; GFX10PLUS-NEXT:    ; return to shader part epilog
1600;
1601; GFX12-LABEL: sample_c_d_2d:
1602; GFX12:       ; %bb.0: ; %main_body
1603; GFX12-NEXT:    image_sample_c_d v[0:3], [v0, v1, v2, v[3:6]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1604; GFX12-NEXT:    s_wait_samplecnt 0x0
1605; GFX12-NEXT:    ; return to shader part epilog
1606main_body:
1607  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1608  ret <4 x float> %v
1609}
1610
1611define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) {
1612; VERDE-LABEL: sample_d_cl_1d:
1613; VERDE:       ; %bb.0: ; %main_body
1614; VERDE-NEXT:    image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1615; VERDE-NEXT:    s_waitcnt vmcnt(0)
1616; VERDE-NEXT:    ; return to shader part epilog
1617;
1618; GFX6789-LABEL: sample_d_cl_1d:
1619; GFX6789:       ; %bb.0: ; %main_body
1620; GFX6789-NEXT:    image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1621; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1622; GFX6789-NEXT:    ; return to shader part epilog
1623;
1624; GFX10PLUS-LABEL: sample_d_cl_1d:
1625; GFX10PLUS:       ; %bb.0: ; %main_body
1626; GFX10PLUS-NEXT:    image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1627; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1628; GFX10PLUS-NEXT:    ; return to shader part epilog
1629;
1630; GFX12-LABEL: sample_d_cl_1d:
1631; GFX12:       ; %bb.0: ; %main_body
1632; GFX12-NEXT:    image_sample_d_cl v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1633; GFX12-NEXT:    s_wait_samplecnt 0x0
1634; GFX12-NEXT:    ; return to shader part epilog
1635main_body:
1636  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1637  ret <4 x float> %v
1638}
1639
1640define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
1641; VERDE-LABEL: sample_d_cl_2d:
1642; VERDE:       ; %bb.0: ; %main_body
1643; VERDE-NEXT:    image_sample_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf
1644; VERDE-NEXT:    s_waitcnt vmcnt(0)
1645; VERDE-NEXT:    ; return to shader part epilog
1646;
1647; GFX6789-LABEL: sample_d_cl_2d:
1648; GFX6789:       ; %bb.0: ; %main_body
1649; GFX6789-NEXT:    image_sample_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf
1650; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1651; GFX6789-NEXT:    ; return to shader part epilog
1652;
1653; GFX10PLUS-LABEL: sample_d_cl_2d:
1654; GFX10PLUS:       ; %bb.0: ; %main_body
1655; GFX10PLUS-NEXT:    image_sample_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1656; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1657; GFX10PLUS-NEXT:    ; return to shader part epilog
1658;
1659; GFX12-LABEL: sample_d_cl_2d:
1660; GFX12:       ; %bb.0: ; %main_body
1661; GFX12-NEXT:    image_sample_d_cl v[0:3], [v0, v1, v2, v[3:6]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1662; GFX12-NEXT:    s_wait_samplecnt 0x0
1663; GFX12-NEXT:    ; return to shader part epilog
1664main_body:
1665  %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1666  ret <4 x float> %v
1667}
1668
1669define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) {
1670; VERDE-LABEL: sample_c_d_cl_1d:
1671; VERDE:       ; %bb.0: ; %main_body
1672; VERDE-NEXT:    image_sample_c_d_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf
1673; VERDE-NEXT:    s_waitcnt vmcnt(0)
1674; VERDE-NEXT:    ; return to shader part epilog
1675;
1676; GFX6789-LABEL: sample_c_d_cl_1d:
1677; GFX6789:       ; %bb.0: ; %main_body
1678; GFX6789-NEXT:    image_sample_c_d_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf
1679; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1680; GFX6789-NEXT:    ; return to shader part epilog
1681;
1682; GFX10PLUS-LABEL: sample_c_d_cl_1d:
1683; GFX10PLUS:       ; %bb.0: ; %main_body
1684; GFX10PLUS-NEXT:    image_sample_c_d_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1685; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1686; GFX10PLUS-NEXT:    ; return to shader part epilog
1687;
1688; GFX12-LABEL: sample_c_d_cl_1d:
1689; GFX12:       ; %bb.0: ; %main_body
1690; GFX12-NEXT:    image_sample_c_d_cl v[0:3], [v0, v1, v2, v[3:4]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1691; GFX12-NEXT:    s_wait_samplecnt 0x0
1692; GFX12-NEXT:    ; return to shader part epilog
1693main_body:
1694  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1695  ret <4 x float> %v
1696}
1697
1698define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) {
1699; VERDE-LABEL: sample_c_d_cl_2d:
1700; VERDE:       ; %bb.0: ; %main_body
1701; VERDE-NEXT:    image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
1702; VERDE-NEXT:    s_waitcnt vmcnt(0)
1703; VERDE-NEXT:    ; return to shader part epilog
1704;
1705; GFX6789-LABEL: sample_c_d_cl_2d:
1706; GFX6789:       ; %bb.0: ; %main_body
1707; GFX6789-NEXT:    image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf
1708; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1709; GFX6789-NEXT:    ; return to shader part epilog
1710;
1711; GFX10PLUS-LABEL: sample_c_d_cl_2d:
1712; GFX10PLUS:       ; %bb.0: ; %main_body
1713; GFX10PLUS-NEXT:    image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1714; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1715; GFX10PLUS-NEXT:    ; return to shader part epilog
1716;
1717; GFX12-LABEL: sample_c_d_cl_2d:
1718; GFX12:       ; %bb.0: ; %main_body
1719; GFX12-NEXT:    image_sample_c_d_cl v[0:3], [v0, v1, v2, v[3:7]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1720; GFX12-NEXT:    s_wait_samplecnt 0x0
1721; GFX12-NEXT:    ; return to shader part epilog
1722main_body:
1723  %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1724  ret <4 x float> %v
1725}
1726
1727define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) {
1728; VERDE-LABEL: sample_l_1d:
1729; VERDE:       ; %bb.0: ; %main_body
1730; VERDE-NEXT:    image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
1731; VERDE-NEXT:    s_waitcnt vmcnt(0)
1732; VERDE-NEXT:    ; return to shader part epilog
1733;
1734; GFX6789-LABEL: sample_l_1d:
1735; GFX6789:       ; %bb.0: ; %main_body
1736; GFX6789-NEXT:    image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
1737; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1738; GFX6789-NEXT:    ; return to shader part epilog
1739;
1740; GFX10PLUS-LABEL: sample_l_1d:
1741; GFX10PLUS:       ; %bb.0: ; %main_body
1742; GFX10PLUS-NEXT:    image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1743; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1744; GFX10PLUS-NEXT:    ; return to shader part epilog
1745;
1746; GFX12-LABEL: sample_l_1d:
1747; GFX12:       ; %bb.0: ; %main_body
1748; GFX12-NEXT:    image_sample_l v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1749; GFX12-NEXT:    s_wait_samplecnt 0x0
1750; GFX12-NEXT:    ; return to shader part epilog
1751main_body:
1752  %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1753  ret <4 x float> %v
1754}
1755
1756define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) {
1757; VERDE-LABEL: sample_l_2d:
1758; VERDE:       ; %bb.0: ; %main_body
1759; VERDE-NEXT:    image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1760; VERDE-NEXT:    s_waitcnt vmcnt(0)
1761; VERDE-NEXT:    ; return to shader part epilog
1762;
1763; GFX6789-LABEL: sample_l_2d:
1764; GFX6789:       ; %bb.0: ; %main_body
1765; GFX6789-NEXT:    image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1766; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1767; GFX6789-NEXT:    ; return to shader part epilog
1768;
1769; GFX10PLUS-LABEL: sample_l_2d:
1770; GFX10PLUS:       ; %bb.0: ; %main_body
1771; GFX10PLUS-NEXT:    image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1772; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1773; GFX10PLUS-NEXT:    ; return to shader part epilog
1774;
1775; GFX12-LABEL: sample_l_2d:
1776; GFX12:       ; %bb.0: ; %main_body
1777; GFX12-NEXT:    image_sample_l v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1778; GFX12-NEXT:    s_wait_samplecnt 0x0
1779; GFX12-NEXT:    ; return to shader part epilog
1780main_body:
1781  %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1782  ret <4 x float> %v
1783}
1784
1785define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) {
1786; VERDE-LABEL: sample_c_l_1d:
1787; VERDE:       ; %bb.0: ; %main_body
1788; VERDE-NEXT:    image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1789; VERDE-NEXT:    s_waitcnt vmcnt(0)
1790; VERDE-NEXT:    ; return to shader part epilog
1791;
1792; GFX6789-LABEL: sample_c_l_1d:
1793; GFX6789:       ; %bb.0: ; %main_body
1794; GFX6789-NEXT:    image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1795; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1796; GFX6789-NEXT:    ; return to shader part epilog
1797;
1798; GFX10PLUS-LABEL: sample_c_l_1d:
1799; GFX10PLUS:       ; %bb.0: ; %main_body
1800; GFX10PLUS-NEXT:    image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1801; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1802; GFX10PLUS-NEXT:    ; return to shader part epilog
1803;
1804; GFX12-LABEL: sample_c_l_1d:
1805; GFX12:       ; %bb.0: ; %main_body
1806; GFX12-NEXT:    image_sample_c_l v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1807; GFX12-NEXT:    s_wait_samplecnt 0x0
1808; GFX12-NEXT:    ; return to shader part epilog
1809main_body:
1810  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1811  ret <4 x float> %v
1812}
1813
1814define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) {
1815; VERDE-LABEL: sample_c_l_2d:
1816; VERDE:       ; %bb.0: ; %main_body
1817; VERDE-NEXT:    image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1818; VERDE-NEXT:    s_waitcnt vmcnt(0)
1819; VERDE-NEXT:    ; return to shader part epilog
1820;
1821; GFX6789-LABEL: sample_c_l_2d:
1822; GFX6789:       ; %bb.0: ; %main_body
1823; GFX6789-NEXT:    image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf
1824; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1825; GFX6789-NEXT:    ; return to shader part epilog
1826;
1827; GFX10PLUS-LABEL: sample_c_l_2d:
1828; GFX10PLUS:       ; %bb.0: ; %main_body
1829; GFX10PLUS-NEXT:    image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1830; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1831; GFX10PLUS-NEXT:    ; return to shader part epilog
1832;
1833; GFX12-LABEL: sample_c_l_2d:
1834; GFX12:       ; %bb.0: ; %main_body
1835; GFX12-NEXT:    image_sample_c_l v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1836; GFX12-NEXT:    s_wait_samplecnt 0x0
1837; GFX12-NEXT:    ; return to shader part epilog
1838main_body:
1839  %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1840  ret <4 x float> %v
1841}
1842
1843define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
1844; VERDE-LABEL: sample_lz_1d:
1845; VERDE:       ; %bb.0: ; %main_body
1846; VERDE-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf
1847; VERDE-NEXT:    s_waitcnt vmcnt(0)
1848; VERDE-NEXT:    ; return to shader part epilog
1849;
1850; GFX6789-LABEL: sample_lz_1d:
1851; GFX6789:       ; %bb.0: ; %main_body
1852; GFX6789-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf
1853; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1854; GFX6789-NEXT:    ; return to shader part epilog
1855;
1856; GFX10PLUS-LABEL: sample_lz_1d:
1857; GFX10PLUS:       ; %bb.0: ; %main_body
1858; GFX10PLUS-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1859; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1860; GFX10PLUS-NEXT:    ; return to shader part epilog
1861;
1862; GFX12-LABEL: sample_lz_1d:
1863; GFX12:       ; %bb.0: ; %main_body
1864; GFX12-NEXT:    image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1865; GFX12-NEXT:    s_wait_samplecnt 0x0
1866; GFX12-NEXT:    ; return to shader part epilog
1867main_body:
1868  %v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1869  ret <4 x float> %v
1870}
1871
1872define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
1873; VERDE-LABEL: sample_lz_2d:
1874; VERDE:       ; %bb.0: ; %main_body
1875; VERDE-NEXT:    image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
1876; VERDE-NEXT:    s_waitcnt vmcnt(0)
1877; VERDE-NEXT:    ; return to shader part epilog
1878;
1879; GFX6789-LABEL: sample_lz_2d:
1880; GFX6789:       ; %bb.0: ; %main_body
1881; GFX6789-NEXT:    image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
1882; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1883; GFX6789-NEXT:    ; return to shader part epilog
1884;
1885; GFX10PLUS-LABEL: sample_lz_2d:
1886; GFX10PLUS:       ; %bb.0: ; %main_body
1887; GFX10PLUS-NEXT:    image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1888; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1889; GFX10PLUS-NEXT:    ; return to shader part epilog
1890;
1891; GFX12-LABEL: sample_lz_2d:
1892; GFX12:       ; %bb.0: ; %main_body
1893; GFX12-NEXT:    image_sample_lz v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1894; GFX12-NEXT:    s_wait_samplecnt 0x0
1895; GFX12-NEXT:    ; return to shader part epilog
1896main_body:
1897  %v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1898  ret <4 x float> %v
1899}
1900
1901define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) {
1902; VERDE-LABEL: sample_c_lz_1d:
1903; VERDE:       ; %bb.0: ; %main_body
1904; VERDE-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
1905; VERDE-NEXT:    s_waitcnt vmcnt(0)
1906; VERDE-NEXT:    ; return to shader part epilog
1907;
1908; GFX6789-LABEL: sample_c_lz_1d:
1909; GFX6789:       ; %bb.0: ; %main_body
1910; GFX6789-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf
1911; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1912; GFX6789-NEXT:    ; return to shader part epilog
1913;
1914; GFX10PLUS-LABEL: sample_c_lz_1d:
1915; GFX10PLUS:       ; %bb.0: ; %main_body
1916; GFX10PLUS-NEXT:    image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1917; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1918; GFX10PLUS-NEXT:    ; return to shader part epilog
1919;
1920; GFX12-LABEL: sample_c_lz_1d:
1921; GFX12:       ; %bb.0: ; %main_body
1922; GFX12-NEXT:    image_sample_c_lz v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D
1923; GFX12-NEXT:    s_wait_samplecnt 0x0
1924; GFX12-NEXT:    ; return to shader part epilog
1925main_body:
1926  %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1927  ret <4 x float> %v
1928}
1929
1930define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) {
1931; VERDE-LABEL: sample_c_lz_2d:
1932; VERDE:       ; %bb.0: ; %main_body
1933; VERDE-NEXT:    image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1934; VERDE-NEXT:    s_waitcnt vmcnt(0)
1935; VERDE-NEXT:    ; return to shader part epilog
1936;
1937; GFX6789-LABEL: sample_c_lz_2d:
1938; GFX6789:       ; %bb.0: ; %main_body
1939; GFX6789-NEXT:    image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf
1940; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1941; GFX6789-NEXT:    ; return to shader part epilog
1942;
1943; GFX10PLUS-LABEL: sample_c_lz_2d:
1944; GFX10PLUS:       ; %bb.0: ; %main_body
1945; GFX10PLUS-NEXT:    image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1946; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1947; GFX10PLUS-NEXT:    ; return to shader part epilog
1948;
1949; GFX12-LABEL: sample_c_lz_2d:
1950; GFX12:       ; %bb.0: ; %main_body
1951; GFX12-NEXT:    image_sample_c_lz v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D
1952; GFX12-NEXT:    s_wait_samplecnt 0x0
1953; GFX12-NEXT:    ; return to shader part epilog
1954main_body:
1955  %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1956  ret <4 x float> %v
1957}
1958
1959define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
1960; VERDE-LABEL: sample_c_d_o_2darray_V1:
1961; VERDE:       ; %bb.0: ; %main_body
1962; VERDE-NEXT:    image_sample_c_d_o v0, v[0:8], s[0:7], s[8:11] dmask:0x4 da
1963; VERDE-NEXT:    s_waitcnt vmcnt(0)
1964; VERDE-NEXT:    ; return to shader part epilog
1965;
1966; GFX6789-LABEL: sample_c_d_o_2darray_V1:
1967; GFX6789:       ; %bb.0: ; %main_body
1968; GFX6789-NEXT:    image_sample_c_d_o v0, v[0:8], s[0:7], s[8:11] dmask:0x4 da
1969; GFX6789-NEXT:    s_waitcnt vmcnt(0)
1970; GFX6789-NEXT:    ; return to shader part epilog
1971;
1972; GFX10PLUS-LABEL: sample_c_d_o_2darray_V1:
1973; GFX10PLUS:       ; %bb.0: ; %main_body
1974; GFX10PLUS-NEXT:    image_sample_c_d_o v0, v[0:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
1975; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
1976; GFX10PLUS-NEXT:    ; return to shader part epilog
1977;
1978; GFX12-LABEL: sample_c_d_o_2darray_V1:
1979; GFX12:       ; %bb.0: ; %main_body
1980; GFX12-NEXT:    image_sample_c_d_o v0, [v0, v1, v2, v[3:8]], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY
1981; GFX12-NEXT:    s_wait_samplecnt 0x0
1982; GFX12-NEXT:    ; return to shader part epilog
1983main_body:
1984  %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
1985  ret float %v
1986}
1987
1988define amdgpu_ps float @sample_c_d_o_2darray_V1_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, ptr addrspace(1) inreg %out) {
1989; VERDE-LABEL: sample_c_d_o_2darray_V1_tfe:
1990; VERDE:       ; %bb.0: ; %main_body
1991; VERDE-NEXT:    v_mov_b32_e32 v9, 0
1992; VERDE-NEXT:    v_mov_b32_e32 v10, v9
1993; VERDE-NEXT:    image_sample_c_d_o v[9:10], v[0:8], s[0:7], s[8:11] dmask:0x4 tfe da
1994; VERDE-NEXT:    s_mov_b32 s15, 0xf000
1995; VERDE-NEXT:    s_mov_b32 s14, -1
1996; VERDE-NEXT:    s_waitcnt vmcnt(0)
1997; VERDE-NEXT:    v_mov_b32_e32 v0, v9
1998; VERDE-NEXT:    buffer_store_dword v10, off, s[12:15], 0
1999; VERDE-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
2000; VERDE-NEXT:    ; return to shader part epilog
2001;
2002; GFX6789-LABEL: sample_c_d_o_2darray_V1_tfe:
2003; GFX6789:       ; %bb.0: ; %main_body
2004; GFX6789-NEXT:    v_mov_b32_e32 v11, 0
2005; GFX6789-NEXT:    v_mov_b32_e32 v12, v11
2006; GFX6789-NEXT:    v_mov_b32_e32 v9, v11
2007; GFX6789-NEXT:    v_mov_b32_e32 v10, v12
2008; GFX6789-NEXT:    image_sample_c_d_o v[9:10], v[0:8], s[0:7], s[8:11] dmask:0x4 tfe da
2009; GFX6789-NEXT:    s_waitcnt vmcnt(0)
2010; GFX6789-NEXT:    v_mov_b32_e32 v0, v9
2011; GFX6789-NEXT:    global_store_dword v11, v10, s[12:13]
2012; GFX6789-NEXT:    s_waitcnt vmcnt(0)
2013; GFX6789-NEXT:    ; return to shader part epilog
2014;
2015; GFX10-LABEL: sample_c_d_o_2darray_V1_tfe:
2016; GFX10:       ; %bb.0: ; %main_body
2017; GFX10-NEXT:    v_mov_b32_e32 v11, 0
2018; GFX10-NEXT:    v_mov_b32_e32 v12, v11
2019; GFX10-NEXT:    v_mov_b32_e32 v9, v11
2020; GFX10-NEXT:    v_mov_b32_e32 v10, v12
2021; GFX10-NEXT:    image_sample_c_d_o v[9:10], v[0:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe
2022; GFX10-NEXT:    s_waitcnt vmcnt(0)
2023; GFX10-NEXT:    v_mov_b32_e32 v0, v9
2024; GFX10-NEXT:    global_store_dword v11, v10, s[12:13]
2025; GFX10-NEXT:    ; return to shader part epilog
2026;
2027; GFX11-LABEL: sample_c_d_o_2darray_V1_tfe:
2028; GFX11:       ; %bb.0: ; %main_body
2029; GFX11-NEXT:    v_mov_b32_e32 v11, 0
2030; GFX11-NEXT:    v_dual_mov_b32 v9, v1 :: v_dual_mov_b32 v10, v0
2031; GFX11-NEXT:    v_mov_b32_e32 v12, v11
2032; GFX11-NEXT:    v_dual_mov_b32 v0, v11 :: v_dual_mov_b32 v1, v12
2033; GFX11-NEXT:    image_sample_c_d_o v[0:1], [v10, v9, v2, v3, v[4:8]], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe
2034; GFX11-NEXT:    s_waitcnt vmcnt(0)
2035; GFX11-NEXT:    global_store_b32 v11, v1, s[12:13]
2036; GFX11-NEXT:    ; return to shader part epilog
2037;
2038; GFX12-LABEL: sample_c_d_o_2darray_V1_tfe:
2039; GFX12:       ; %bb.0: ; %main_body
2040; GFX12-NEXT:    v_mov_b32_e32 v11, 0
2041; GFX12-NEXT:    v_dual_mov_b32 v9, v1 :: v_dual_mov_b32 v10, v0
2042; GFX12-NEXT:    v_mov_b32_e32 v12, v11
2043; GFX12-NEXT:    v_dual_mov_b32 v0, v11 :: v_dual_mov_b32 v1, v12
2044; GFX12-NEXT:    image_sample_c_d_o v[0:1], [v10, v9, v2, v[3:8]], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe
2045; GFX12-NEXT:    s_wait_samplecnt 0x0
2046; GFX12-NEXT:    global_store_b32 v11, v1, s[12:13]
2047; GFX12-NEXT:    ; return to shader part epilog
2048main_body:
2049  %v = call {float,i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
2050  %v.vec = extractvalue {float, i32} %v, 0
2051  %v.err = extractvalue {float, i32} %v, 1
2052  store i32 %v.err, ptr addrspace(1) %out, align 4
2053  ret float %v.vec
2054}
2055
2056define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
2057; VERDE-LABEL: sample_c_d_o_2darray_V2:
2058; VERDE:       ; %bb.0: ; %main_body
2059; VERDE-NEXT:    image_sample_c_d_o v[0:1], v[0:8], s[0:7], s[8:11] dmask:0x6 da
2060; VERDE-NEXT:    s_waitcnt vmcnt(0)
2061; VERDE-NEXT:    ; return to shader part epilog
2062;
2063; GFX6789-LABEL: sample_c_d_o_2darray_V2:
2064; GFX6789:       ; %bb.0: ; %main_body
2065; GFX6789-NEXT:    image_sample_c_d_o v[0:1], v[0:8], s[0:7], s[8:11] dmask:0x6 da
2066; GFX6789-NEXT:    s_waitcnt vmcnt(0)
2067; GFX6789-NEXT:    ; return to shader part epilog
2068;
2069; GFX10PLUS-LABEL: sample_c_d_o_2darray_V2:
2070; GFX10PLUS:       ; %bb.0: ; %main_body
2071; GFX10PLUS-NEXT:    image_sample_c_d_o v[0:1], v[0:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
2072; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
2073; GFX10PLUS-NEXT:    ; return to shader part epilog
2074;
2075; GFX12-LABEL: sample_c_d_o_2darray_V2:
2076; GFX12:       ; %bb.0: ; %main_body
2077; GFX12-NEXT:    image_sample_c_d_o v[0:1], [v0, v1, v2, v[3:8]], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY
2078; GFX12-NEXT:    s_wait_samplecnt 0x0
2079; GFX12-NEXT:    ; return to shader part epilog
2080main_body:
2081  %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2082  ret <2 x float> %v
2083}
2084
2085define amdgpu_ps <4 x float> @sample_c_d_o_2darray_V2_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) {
2086; VERDE-LABEL: sample_c_d_o_2darray_V2_tfe:
2087; VERDE:       ; %bb.0: ; %main_body
2088; VERDE-NEXT:    v_mov_b32_e32 v9, 0
2089; VERDE-NEXT:    v_mov_b32_e32 v10, v9
2090; VERDE-NEXT:    v_mov_b32_e32 v11, v9
2091; VERDE-NEXT:    image_sample_c_d_o v[9:11], v[0:8], s[0:7], s[8:11] dmask:0x6 tfe da
2092; VERDE-NEXT:    s_waitcnt vmcnt(0)
2093; VERDE-NEXT:    v_mov_b32_e32 v0, v9
2094; VERDE-NEXT:    v_mov_b32_e32 v1, v10
2095; VERDE-NEXT:    v_mov_b32_e32 v2, v11
2096; VERDE-NEXT:    ; return to shader part epilog
2097;
2098; GFX6789-LABEL: sample_c_d_o_2darray_V2_tfe:
2099; GFX6789:       ; %bb.0: ; %main_body
2100; GFX6789-NEXT:    v_mov_b32_e32 v9, 0
2101; GFX6789-NEXT:    v_mov_b32_e32 v10, v9
2102; GFX6789-NEXT:    v_mov_b32_e32 v11, v9
2103; GFX6789-NEXT:    image_sample_c_d_o v[9:11], v[0:8], s[0:7], s[8:11] dmask:0x6 tfe da
2104; GFX6789-NEXT:    s_waitcnt vmcnt(0)
2105; GFX6789-NEXT:    v_mov_b32_e32 v0, v9
2106; GFX6789-NEXT:    v_mov_b32_e32 v1, v10
2107; GFX6789-NEXT:    v_mov_b32_e32 v2, v11
2108; GFX6789-NEXT:    ; return to shader part epilog
2109;
2110; GFX10-LABEL: sample_c_d_o_2darray_V2_tfe:
2111; GFX10:       ; %bb.0: ; %main_body
2112; GFX10-NEXT:    v_mov_b32_e32 v9, 0
2113; GFX10-NEXT:    v_mov_b32_e32 v10, v9
2114; GFX10-NEXT:    v_mov_b32_e32 v11, v9
2115; GFX10-NEXT:    image_sample_c_d_o v[9:11], v[0:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe
2116; GFX10-NEXT:    s_waitcnt vmcnt(0)
2117; GFX10-NEXT:    v_mov_b32_e32 v0, v9
2118; GFX10-NEXT:    v_mov_b32_e32 v1, v10
2119; GFX10-NEXT:    v_mov_b32_e32 v2, v11
2120; GFX10-NEXT:    ; return to shader part epilog
2121;
2122; GFX11-LABEL: sample_c_d_o_2darray_V2_tfe:
2123; GFX11:       ; %bb.0: ; %main_body
2124; GFX11-NEXT:    v_dual_mov_b32 v11, v0 :: v_dual_mov_b32 v0, 0
2125; GFX11-NEXT:    v_dual_mov_b32 v9, v2 :: v_dual_mov_b32 v10, v1
2126; GFX11-NEXT:    v_mov_b32_e32 v1, v0
2127; GFX11-NEXT:    v_mov_b32_e32 v2, v0
2128; GFX11-NEXT:    image_sample_c_d_o v[0:2], [v11, v10, v9, v3, v[4:8]], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe
2129; GFX11-NEXT:    s_waitcnt vmcnt(0)
2130; GFX11-NEXT:    ; return to shader part epilog
2131;
2132; GFX12-LABEL: sample_c_d_o_2darray_V2_tfe:
2133; GFX12:       ; %bb.0: ; %main_body
2134; GFX12-NEXT:    v_dual_mov_b32 v11, v0 :: v_dual_mov_b32 v0, 0
2135; GFX12-NEXT:    v_dual_mov_b32 v9, v2 :: v_dual_mov_b32 v10, v1
2136; GFX12-NEXT:    v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v2, v0
2137; GFX12-NEXT:    image_sample_c_d_o v[0:2], [v11, v10, v9, v[3:8]], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe
2138; GFX12-NEXT:    s_wait_samplecnt 0x0
2139; GFX12-NEXT:    ; return to shader part epilog
2140main_body:
2141  %v = call {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0)
2142  %v.vec = extractvalue {<2 x float>, i32} %v, 0
2143  %v.f1 = extractelement <2 x float> %v.vec, i32 0
2144  %v.f2 = extractelement <2 x float> %v.vec, i32 1
2145  %v.err = extractvalue {<2 x float>, i32} %v, 1
2146  %v.errf = bitcast i32 %v.err to float
2147  %res.0 = insertelement <4 x float> undef, float %v.f1, i32 0
2148  %res.1 = insertelement <4 x float> %res.0, float %v.f2, i32 1
2149  %res.2 = insertelement <4 x float> %res.1, float %v.errf, i32 2
2150  ret <4 x float> %res.2
2151}
2152
2153define amdgpu_ps <4 x float> @sample_1d_unorm(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
2154; VERDE-LABEL: sample_1d_unorm:
2155; VERDE:       ; %bb.0: ; %main_body
2156; VERDE-NEXT:    s_mov_b64 s[12:13], exec
2157; VERDE-NEXT:    s_wqm_b64 exec, exec
2158; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
2159; VERDE-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf unorm
2160; VERDE-NEXT:    s_waitcnt vmcnt(0)
2161; VERDE-NEXT:    ; return to shader part epilog
2162;
2163; GFX6789-LABEL: sample_1d_unorm:
2164; GFX6789:       ; %bb.0: ; %main_body
2165; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
2166; GFX6789-NEXT:    s_wqm_b64 exec, exec
2167; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
2168; GFX6789-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf unorm
2169; GFX6789-NEXT:    s_waitcnt vmcnt(0)
2170; GFX6789-NEXT:    ; return to shader part epilog
2171;
2172; GFX10PLUS-LABEL: sample_1d_unorm:
2173; GFX10PLUS:       ; %bb.0: ; %main_body
2174; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
2175; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
2176; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
2177; GFX10PLUS-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D unorm
2178; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
2179; GFX10PLUS-NEXT:    ; return to shader part epilog
2180;
2181; GFX12-LABEL: sample_1d_unorm:
2182; GFX12:       ; %bb.0: ; %main_body
2183; GFX12-NEXT:    s_mov_b32 s12, exec_lo
2184; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
2185; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
2186; GFX12-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D unorm
2187; GFX12-NEXT:    s_wait_samplecnt 0x0
2188; GFX12-NEXT:    ; return to shader part epilog
2189main_body:
2190  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 1, i32 0, i32 0)
2191  ret <4 x float> %v
2192}
2193
2194define amdgpu_ps <4 x float> @sample_1d_glc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
2195; VERDE-LABEL: sample_1d_glc:
2196; VERDE:       ; %bb.0: ; %main_body
2197; VERDE-NEXT:    s_mov_b64 s[12:13], exec
2198; VERDE-NEXT:    s_wqm_b64 exec, exec
2199; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
2200; VERDE-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc
2201; VERDE-NEXT:    s_waitcnt vmcnt(0)
2202; VERDE-NEXT:    ; return to shader part epilog
2203;
2204; GFX6789-LABEL: sample_1d_glc:
2205; GFX6789:       ; %bb.0: ; %main_body
2206; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
2207; GFX6789-NEXT:    s_wqm_b64 exec, exec
2208; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
2209; GFX6789-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc
2210; GFX6789-NEXT:    s_waitcnt vmcnt(0)
2211; GFX6789-NEXT:    ; return to shader part epilog
2212;
2213; GFX10PLUS-LABEL: sample_1d_glc:
2214; GFX10PLUS:       ; %bb.0: ; %main_body
2215; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
2216; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
2217; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
2218; GFX10PLUS-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc
2219; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
2220; GFX10PLUS-NEXT:    ; return to shader part epilog
2221;
2222; GFX12-LABEL: sample_1d_glc:
2223; GFX12:       ; %bb.0: ; %main_body
2224; GFX12-NEXT:    s_mov_b32 s12, exec_lo
2225; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
2226; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
2227; GFX12-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D th:TH_LOAD_NT
2228; GFX12-NEXT:    s_wait_samplecnt 0x0
2229; GFX12-NEXT:    ; return to shader part epilog
2230main_body:
2231  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 1)
2232  ret <4 x float> %v
2233}
2234
2235define amdgpu_ps <4 x float> @sample_1d_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
2236; VERDE-LABEL: sample_1d_slc:
2237; VERDE:       ; %bb.0: ; %main_body
2238; VERDE-NEXT:    s_mov_b64 s[12:13], exec
2239; VERDE-NEXT:    s_wqm_b64 exec, exec
2240; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
2241; VERDE-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf slc
2242; VERDE-NEXT:    s_waitcnt vmcnt(0)
2243; VERDE-NEXT:    ; return to shader part epilog
2244;
2245; GFX6789-LABEL: sample_1d_slc:
2246; GFX6789:       ; %bb.0: ; %main_body
2247; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
2248; GFX6789-NEXT:    s_wqm_b64 exec, exec
2249; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
2250; GFX6789-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf slc
2251; GFX6789-NEXT:    s_waitcnt vmcnt(0)
2252; GFX6789-NEXT:    ; return to shader part epilog
2253;
2254; GFX10PLUS-LABEL: sample_1d_slc:
2255; GFX10PLUS:       ; %bb.0: ; %main_body
2256; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
2257; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
2258; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
2259; GFX10PLUS-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D slc
2260; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
2261; GFX10PLUS-NEXT:    ; return to shader part epilog
2262;
2263; GFX12-LABEL: sample_1d_slc:
2264; GFX12:       ; %bb.0: ; %main_body
2265; GFX12-NEXT:    s_mov_b32 s12, exec_lo
2266; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
2267; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
2268; GFX12-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D th:TH_LOAD_HT
2269; GFX12-NEXT:    s_wait_samplecnt 0x0
2270; GFX12-NEXT:    ; return to shader part epilog
2271main_body:
2272  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 2)
2273  ret <4 x float> %v
2274}
2275
2276define amdgpu_ps <4 x float> @sample_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
2277; VERDE-LABEL: sample_1d_glc_slc:
2278; VERDE:       ; %bb.0: ; %main_body
2279; VERDE-NEXT:    s_mov_b64 s[12:13], exec
2280; VERDE-NEXT:    s_wqm_b64 exec, exec
2281; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
2282; VERDE-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc slc
2283; VERDE-NEXT:    s_waitcnt vmcnt(0)
2284; VERDE-NEXT:    ; return to shader part epilog
2285;
2286; GFX6789-LABEL: sample_1d_glc_slc:
2287; GFX6789:       ; %bb.0: ; %main_body
2288; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
2289; GFX6789-NEXT:    s_wqm_b64 exec, exec
2290; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
2291; GFX6789-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc slc
2292; GFX6789-NEXT:    s_waitcnt vmcnt(0)
2293; GFX6789-NEXT:    ; return to shader part epilog
2294;
2295; GFX10PLUS-LABEL: sample_1d_glc_slc:
2296; GFX10PLUS:       ; %bb.0: ; %main_body
2297; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
2298; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
2299; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
2300; GFX10PLUS-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc slc
2301; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
2302; GFX10PLUS-NEXT:    ; return to shader part epilog
2303;
2304; GFX12-LABEL: sample_1d_glc_slc:
2305; GFX12:       ; %bb.0: ; %main_body
2306; GFX12-NEXT:    s_mov_b32 s12, exec_lo
2307; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
2308; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
2309; GFX12-NEXT:    image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D th:TH_LOAD_LU
2310; GFX12-NEXT:    s_wait_samplecnt 0x0
2311; GFX12-NEXT:    ; return to shader part epilog
2312main_body:
2313  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 3)
2314  ret <4 x float> %v
2315}
2316
2317define amdgpu_ps float @adjust_writemask_sample_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
2318; VERDE-LABEL: adjust_writemask_sample_0:
2319; VERDE:       ; %bb.0: ; %main_body
2320; VERDE-NEXT:    s_mov_b64 s[12:13], exec
2321; VERDE-NEXT:    s_wqm_b64 exec, exec
2322; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
2323; VERDE-NEXT:    image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
2324; VERDE-NEXT:    s_waitcnt vmcnt(0)
2325; VERDE-NEXT:    ; return to shader part epilog
2326;
2327; GFX6789-LABEL: adjust_writemask_sample_0:
2328; GFX6789:       ; %bb.0: ; %main_body
2329; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
2330; GFX6789-NEXT:    s_wqm_b64 exec, exec
2331; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
2332; GFX6789-NEXT:    image_sample v0, v0, s[0:7], s[8:11] dmask:0x1
2333; GFX6789-NEXT:    s_waitcnt vmcnt(0)
2334; GFX6789-NEXT:    ; return to shader part epilog
2335;
2336; GFX10PLUS-LABEL: adjust_writemask_sample_0:
2337; GFX10PLUS:       ; %bb.0: ; %main_body
2338; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
2339; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
2340; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
2341; GFX10PLUS-NEXT:    image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
2342; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
2343; GFX10PLUS-NEXT:    ; return to shader part epilog
2344;
2345; GFX12-LABEL: adjust_writemask_sample_0:
2346; GFX12:       ; %bb.0: ; %main_body
2347; GFX12-NEXT:    s_mov_b32 s12, exec_lo
2348; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
2349; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
2350; GFX12-NEXT:    image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D
2351; GFX12-NEXT:    s_wait_samplecnt 0x0
2352; GFX12-NEXT:    ; return to shader part epilog
2353main_body:
2354  %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2355  %elt0 = extractelement <4 x float> %r, i32 0
2356  ret float %elt0
2357}
2358
2359define amdgpu_ps <2 x float> @adjust_writemask_sample_01(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
2360; VERDE-LABEL: adjust_writemask_sample_01:
2361; VERDE:       ; %bb.0: ; %main_body
2362; VERDE-NEXT:    s_mov_b64 s[12:13], exec
2363; VERDE-NEXT:    s_wqm_b64 exec, exec
2364; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
2365; VERDE-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3
2366; VERDE-NEXT:    s_waitcnt vmcnt(0)
2367; VERDE-NEXT:    ; return to shader part epilog
2368;
2369; GFX6789-LABEL: adjust_writemask_sample_01:
2370; GFX6789:       ; %bb.0: ; %main_body
2371; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
2372; GFX6789-NEXT:    s_wqm_b64 exec, exec
2373; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
2374; GFX6789-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3
2375; GFX6789-NEXT:    s_waitcnt vmcnt(0)
2376; GFX6789-NEXT:    ; return to shader part epilog
2377;
2378; GFX10PLUS-LABEL: adjust_writemask_sample_01:
2379; GFX10PLUS:       ; %bb.0: ; %main_body
2380; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
2381; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
2382; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
2383; GFX10PLUS-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D
2384; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
2385; GFX10PLUS-NEXT:    ; return to shader part epilog
2386;
2387; GFX12-LABEL: adjust_writemask_sample_01:
2388; GFX12:       ; %bb.0: ; %main_body
2389; GFX12-NEXT:    s_mov_b32 s12, exec_lo
2390; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
2391; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
2392; GFX12-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D
2393; GFX12-NEXT:    s_wait_samplecnt 0x0
2394; GFX12-NEXT:    ; return to shader part epilog
2395main_body:
2396  %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2397  %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
2398  ret <2 x float> %out
2399}
2400
2401define amdgpu_ps <3 x float> @adjust_writemask_sample_012(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
2402; VERDE-LABEL: adjust_writemask_sample_012:
2403; VERDE:       ; %bb.0: ; %main_body
2404; VERDE-NEXT:    s_mov_b64 s[12:13], exec
2405; VERDE-NEXT:    s_wqm_b64 exec, exec
2406; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
2407; VERDE-NEXT:    image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7
2408; VERDE-NEXT:    s_waitcnt vmcnt(0)
2409; VERDE-NEXT:    ; return to shader part epilog
2410;
2411; GFX6789-LABEL: adjust_writemask_sample_012:
2412; GFX6789:       ; %bb.0: ; %main_body
2413; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
2414; GFX6789-NEXT:    s_wqm_b64 exec, exec
2415; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
2416; GFX6789-NEXT:    image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7
2417; GFX6789-NEXT:    s_waitcnt vmcnt(0)
2418; GFX6789-NEXT:    ; return to shader part epilog
2419;
2420; GFX10PLUS-LABEL: adjust_writemask_sample_012:
2421; GFX10PLUS:       ; %bb.0: ; %main_body
2422; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
2423; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
2424; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
2425; GFX10PLUS-NEXT:    image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7 dim:SQ_RSRC_IMG_1D
2426; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
2427; GFX10PLUS-NEXT:    ; return to shader part epilog
2428;
2429; GFX12-LABEL: adjust_writemask_sample_012:
2430; GFX12:       ; %bb.0: ; %main_body
2431; GFX12-NEXT:    s_mov_b32 s12, exec_lo
2432; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
2433; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
2434; GFX12-NEXT:    image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7 dim:SQ_RSRC_IMG_1D
2435; GFX12-NEXT:    s_wait_samplecnt 0x0
2436; GFX12-NEXT:    ; return to shader part epilog
2437main_body:
2438  %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2439  %out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
2440  ret <3 x float> %out
2441}
2442
2443define amdgpu_ps <2 x float> @adjust_writemask_sample_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
2444; VERDE-LABEL: adjust_writemask_sample_12:
2445; VERDE:       ; %bb.0: ; %main_body
2446; VERDE-NEXT:    s_mov_b64 s[12:13], exec
2447; VERDE-NEXT:    s_wqm_b64 exec, exec
2448; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
2449; VERDE-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
2450; VERDE-NEXT:    s_waitcnt vmcnt(0)
2451; VERDE-NEXT:    ; return to shader part epilog
2452;
2453; GFX6789-LABEL: adjust_writemask_sample_12:
2454; GFX6789:       ; %bb.0: ; %main_body
2455; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
2456; GFX6789-NEXT:    s_wqm_b64 exec, exec
2457; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
2458; GFX6789-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
2459; GFX6789-NEXT:    s_waitcnt vmcnt(0)
2460; GFX6789-NEXT:    ; return to shader part epilog
2461;
2462; GFX10PLUS-LABEL: adjust_writemask_sample_12:
2463; GFX10PLUS:       ; %bb.0: ; %main_body
2464; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
2465; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
2466; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
2467; GFX10PLUS-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_1D
2468; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
2469; GFX10PLUS-NEXT:    ; return to shader part epilog
2470;
2471; GFX12-LABEL: adjust_writemask_sample_12:
2472; GFX12:       ; %bb.0: ; %main_body
2473; GFX12-NEXT:    s_mov_b32 s12, exec_lo
2474; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
2475; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
2476; GFX12-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_1D
2477; GFX12-NEXT:    s_wait_samplecnt 0x0
2478; GFX12-NEXT:    ; return to shader part epilog
2479main_body:
2480  %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2481  %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>
2482  ret <2 x float> %out
2483}
2484
2485define amdgpu_ps <2 x float> @adjust_writemask_sample_03(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
2486; VERDE-LABEL: adjust_writemask_sample_03:
2487; VERDE:       ; %bb.0: ; %main_body
2488; VERDE-NEXT:    s_mov_b64 s[12:13], exec
2489; VERDE-NEXT:    s_wqm_b64 exec, exec
2490; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
2491; VERDE-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9
2492; VERDE-NEXT:    s_waitcnt vmcnt(0)
2493; VERDE-NEXT:    ; return to shader part epilog
2494;
2495; GFX6789-LABEL: adjust_writemask_sample_03:
2496; GFX6789:       ; %bb.0: ; %main_body
2497; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
2498; GFX6789-NEXT:    s_wqm_b64 exec, exec
2499; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
2500; GFX6789-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9
2501; GFX6789-NEXT:    s_waitcnt vmcnt(0)
2502; GFX6789-NEXT:    ; return to shader part epilog
2503;
2504; GFX10PLUS-LABEL: adjust_writemask_sample_03:
2505; GFX10PLUS:       ; %bb.0: ; %main_body
2506; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
2507; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
2508; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
2509; GFX10PLUS-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9 dim:SQ_RSRC_IMG_1D
2510; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
2511; GFX10PLUS-NEXT:    ; return to shader part epilog
2512;
2513; GFX12-LABEL: adjust_writemask_sample_03:
2514; GFX12:       ; %bb.0: ; %main_body
2515; GFX12-NEXT:    s_mov_b32 s12, exec_lo
2516; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
2517; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
2518; GFX12-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9 dim:SQ_RSRC_IMG_1D
2519; GFX12-NEXT:    s_wait_samplecnt 0x0
2520; GFX12-NEXT:    ; return to shader part epilog
2521main_body:
2522  %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2523  %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 3>
2524  ret <2 x float> %out
2525}
2526
2527define amdgpu_ps <2 x float> @adjust_writemask_sample_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
2528; VERDE-LABEL: adjust_writemask_sample_13:
2529; VERDE:       ; %bb.0: ; %main_body
2530; VERDE-NEXT:    s_mov_b64 s[12:13], exec
2531; VERDE-NEXT:    s_wqm_b64 exec, exec
2532; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
2533; VERDE-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
2534; VERDE-NEXT:    s_waitcnt vmcnt(0)
2535; VERDE-NEXT:    ; return to shader part epilog
2536;
2537; GFX6789-LABEL: adjust_writemask_sample_13:
2538; GFX6789:       ; %bb.0: ; %main_body
2539; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
2540; GFX6789-NEXT:    s_wqm_b64 exec, exec
2541; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
2542; GFX6789-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
2543; GFX6789-NEXT:    s_waitcnt vmcnt(0)
2544; GFX6789-NEXT:    ; return to shader part epilog
2545;
2546; GFX10PLUS-LABEL: adjust_writemask_sample_13:
2547; GFX10PLUS:       ; %bb.0: ; %main_body
2548; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
2549; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
2550; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
2551; GFX10PLUS-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D
2552; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
2553; GFX10PLUS-NEXT:    ; return to shader part epilog
2554;
2555; GFX12-LABEL: adjust_writemask_sample_13:
2556; GFX12:       ; %bb.0: ; %main_body
2557; GFX12-NEXT:    s_mov_b32 s12, exec_lo
2558; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
2559; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
2560; GFX12-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D
2561; GFX12-NEXT:    s_wait_samplecnt 0x0
2562; GFX12-NEXT:    ; return to shader part epilog
2563main_body:
2564  %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2565  %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 3>
2566  ret <2 x float> %out
2567}
2568
2569define amdgpu_ps <3 x float> @adjust_writemask_sample_123(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
2570; VERDE-LABEL: adjust_writemask_sample_123:
2571; VERDE:       ; %bb.0: ; %main_body
2572; VERDE-NEXT:    s_mov_b64 s[12:13], exec
2573; VERDE-NEXT:    s_wqm_b64 exec, exec
2574; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
2575; VERDE-NEXT:    image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe
2576; VERDE-NEXT:    s_waitcnt vmcnt(0)
2577; VERDE-NEXT:    ; return to shader part epilog
2578;
2579; GFX6789-LABEL: adjust_writemask_sample_123:
2580; GFX6789:       ; %bb.0: ; %main_body
2581; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
2582; GFX6789-NEXT:    s_wqm_b64 exec, exec
2583; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
2584; GFX6789-NEXT:    image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe
2585; GFX6789-NEXT:    s_waitcnt vmcnt(0)
2586; GFX6789-NEXT:    ; return to shader part epilog
2587;
2588; GFX10PLUS-LABEL: adjust_writemask_sample_123:
2589; GFX10PLUS:       ; %bb.0: ; %main_body
2590; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
2591; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
2592; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
2593; GFX10PLUS-NEXT:    image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe dim:SQ_RSRC_IMG_1D
2594; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
2595; GFX10PLUS-NEXT:    ; return to shader part epilog
2596;
2597; GFX12-LABEL: adjust_writemask_sample_123:
2598; GFX12:       ; %bb.0: ; %main_body
2599; GFX12-NEXT:    s_mov_b32 s12, exec_lo
2600; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
2601; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
2602; GFX12-NEXT:    image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe dim:SQ_RSRC_IMG_1D
2603; GFX12-NEXT:    s_wait_samplecnt 0x0
2604; GFX12-NEXT:    ; return to shader part epilog
2605main_body:
2606  %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2607  %out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3>
2608  ret <3 x float> %out
2609}
2610
2611define amdgpu_ps <4 x float> @adjust_writemask_sample_none_enabled(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
2612; VERDE-LABEL: adjust_writemask_sample_none_enabled:
2613; VERDE:       ; %bb.0: ; %main_body
2614; VERDE-NEXT:    ; return to shader part epilog
2615;
2616; GFX6789-LABEL: adjust_writemask_sample_none_enabled:
2617; GFX6789:       ; %bb.0: ; %main_body
2618; GFX6789-NEXT:    ; return to shader part epilog
2619;
2620; GFX10PLUS-LABEL: adjust_writemask_sample_none_enabled:
2621; GFX10PLUS:       ; %bb.0: ; %main_body
2622; GFX10PLUS-NEXT:    ; return to shader part epilog
2623;
2624; GFX12-LABEL: adjust_writemask_sample_none_enabled:
2625; GFX12:       ; %bb.0: ; %main_body
2626; GFX12-NEXT:    ; return to shader part epilog
2627main_body:
2628  %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2629  ret <4 x float> %r
2630}
2631
2632define amdgpu_ps <2 x float> @adjust_writemask_sample_123_to_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
2633; VERDE-LABEL: adjust_writemask_sample_123_to_12:
2634; VERDE:       ; %bb.0: ; %main_body
2635; VERDE-NEXT:    s_mov_b64 s[12:13], exec
2636; VERDE-NEXT:    s_wqm_b64 exec, exec
2637; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
2638; VERDE-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
2639; VERDE-NEXT:    s_waitcnt vmcnt(0)
2640; VERDE-NEXT:    ; return to shader part epilog
2641;
2642; GFX6789-LABEL: adjust_writemask_sample_123_to_12:
2643; GFX6789:       ; %bb.0: ; %main_body
2644; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
2645; GFX6789-NEXT:    s_wqm_b64 exec, exec
2646; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
2647; GFX6789-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6
2648; GFX6789-NEXT:    s_waitcnt vmcnt(0)
2649; GFX6789-NEXT:    ; return to shader part epilog
2650;
2651; GFX10PLUS-LABEL: adjust_writemask_sample_123_to_12:
2652; GFX10PLUS:       ; %bb.0: ; %main_body
2653; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
2654; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
2655; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
2656; GFX10PLUS-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_1D
2657; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
2658; GFX10PLUS-NEXT:    ; return to shader part epilog
2659;
2660; GFX12-LABEL: adjust_writemask_sample_123_to_12:
2661; GFX12:       ; %bb.0: ; %main_body
2662; GFX12-NEXT:    s_mov_b32 s12, exec_lo
2663; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
2664; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
2665; GFX12-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_1D
2666; GFX12-NEXT:    s_wait_samplecnt 0x0
2667; GFX12-NEXT:    ; return to shader part epilog
2668main_body:
2669  %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 14, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2670  %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1>
2671  ret <2 x float> %out
2672}
2673
2674define amdgpu_ps <2 x float> @adjust_writemask_sample_013_to_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
2675; VERDE-LABEL: adjust_writemask_sample_013_to_13:
2676; VERDE:       ; %bb.0: ; %main_body
2677; VERDE-NEXT:    s_mov_b64 s[12:13], exec
2678; VERDE-NEXT:    s_wqm_b64 exec, exec
2679; VERDE-NEXT:    s_and_b64 exec, exec, s[12:13]
2680; VERDE-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
2681; VERDE-NEXT:    s_waitcnt vmcnt(0)
2682; VERDE-NEXT:    ; return to shader part epilog
2683;
2684; GFX6789-LABEL: adjust_writemask_sample_013_to_13:
2685; GFX6789:       ; %bb.0: ; %main_body
2686; GFX6789-NEXT:    s_mov_b64 s[12:13], exec
2687; GFX6789-NEXT:    s_wqm_b64 exec, exec
2688; GFX6789-NEXT:    s_and_b64 exec, exec, s[12:13]
2689; GFX6789-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa
2690; GFX6789-NEXT:    s_waitcnt vmcnt(0)
2691; GFX6789-NEXT:    ; return to shader part epilog
2692;
2693; GFX10PLUS-LABEL: adjust_writemask_sample_013_to_13:
2694; GFX10PLUS:       ; %bb.0: ; %main_body
2695; GFX10PLUS-NEXT:    s_mov_b32 s12, exec_lo
2696; GFX10PLUS-NEXT:    s_wqm_b32 exec_lo, exec_lo
2697; GFX10PLUS-NEXT:    s_and_b32 exec_lo, exec_lo, s12
2698; GFX10PLUS-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D
2699; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0)
2700; GFX10PLUS-NEXT:    ; return to shader part epilog
2701;
2702; GFX12-LABEL: adjust_writemask_sample_013_to_13:
2703; GFX12:       ; %bb.0: ; %main_body
2704; GFX12-NEXT:    s_mov_b32 s12, exec_lo
2705; GFX12-NEXT:    s_wqm_b32 exec_lo, exec_lo
2706; GFX12-NEXT:    s_and_b32 exec_lo, exec_lo, s12
2707; GFX12-NEXT:    image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D
2708; GFX12-NEXT:    s_wait_samplecnt 0x0
2709; GFX12-NEXT:    ; return to shader part epilog
2710main_body:
2711  %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 11, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
2712  %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2>
2713  ret <2 x float> %out
2714}
2715
2716declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2717declare {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2718declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2719declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2720declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2721declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2722declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2723
2724declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2725declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2726declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2727declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2728declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2729declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2730
2731declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2732declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2733declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2734declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2735declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2736declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2737declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2738declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2739
2740declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2741declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2742declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2743declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2744declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2745declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2746declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2747declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2748
2749declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2750declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2751declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2752declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2753
2754declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2755declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2756declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2757declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2758
2759declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2760declare {float, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2761declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2762declare {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2763
2764attributes #0 = { nounwind }
2765attributes #1 = { nounwind readonly }
2766attributes #2 = { nounwind readnone }
2767