xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.gather4.o.dim.ll (revision 6b5067a81a698142c4f5bd9405cede9191901fe2)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - %s | FileCheck -check-prefix=GFX6 %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s
4; RUN: not --crash llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -o - %s 2>&1 | FileCheck -check-prefix=GFX11-ERR %s
5
6; GFX11-ERR: LLVM ERROR: cannot select: {{.*}} = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.gather4
7
8define amdgpu_ps <4 x float> @gather4_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
9; GFX6-LABEL: gather4_o_2d:
10; GFX6:       ; %bb.0: ; %main_body
11; GFX6-NEXT:    s_mov_b64 s[14:15], exec
12; GFX6-NEXT:    s_mov_b32 s0, s2
13; GFX6-NEXT:    s_mov_b32 s1, s3
14; GFX6-NEXT:    s_mov_b32 s2, s4
15; GFX6-NEXT:    s_mov_b32 s3, s5
16; GFX6-NEXT:    s_mov_b32 s4, s6
17; GFX6-NEXT:    s_mov_b32 s5, s7
18; GFX6-NEXT:    s_mov_b32 s6, s8
19; GFX6-NEXT:    s_mov_b32 s7, s9
20; GFX6-NEXT:    s_mov_b32 s8, s10
21; GFX6-NEXT:    s_mov_b32 s9, s11
22; GFX6-NEXT:    s_mov_b32 s10, s12
23; GFX6-NEXT:    s_mov_b32 s11, s13
24; GFX6-NEXT:    s_wqm_b64 exec, exec
25; GFX6-NEXT:    s_and_b64 exec, exec, s[14:15]
26; GFX6-NEXT:    image_gather4_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1
27; GFX6-NEXT:    s_waitcnt vmcnt(0)
28; GFX6-NEXT:    ; return to shader part epilog
29;
30; GFX10-LABEL: gather4_o_2d:
31; GFX10:       ; %bb.0: ; %main_body
32; GFX10-NEXT:    s_mov_b32 s1, exec_lo
33; GFX10-NEXT:    s_mov_b32 s0, s2
34; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
35; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s1
36; GFX10-NEXT:    s_mov_b32 s1, s3
37; GFX10-NEXT:    s_mov_b32 s2, s4
38; GFX10-NEXT:    s_mov_b32 s3, s5
39; GFX10-NEXT:    s_mov_b32 s4, s6
40; GFX10-NEXT:    s_mov_b32 s5, s7
41; GFX10-NEXT:    s_mov_b32 s6, s8
42; GFX10-NEXT:    s_mov_b32 s7, s9
43; GFX10-NEXT:    s_mov_b32 s8, s10
44; GFX10-NEXT:    s_mov_b32 s9, s11
45; GFX10-NEXT:    s_mov_b32 s10, s12
46; GFX10-NEXT:    s_mov_b32 s11, s13
47; GFX10-NEXT:    image_gather4_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
48; GFX10-NEXT:    s_waitcnt vmcnt(0)
49; GFX10-NEXT:    ; return to shader part epilog
50main_body:
51  %v = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
52  ret <4 x float> %v
53}
54
55define amdgpu_ps <4 x float> @gather4_c_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {
56; GFX6-LABEL: gather4_c_o_2d:
57; GFX6:       ; %bb.0: ; %main_body
58; GFX6-NEXT:    s_mov_b64 s[14:15], exec
59; GFX6-NEXT:    s_mov_b32 s0, s2
60; GFX6-NEXT:    s_mov_b32 s1, s3
61; GFX6-NEXT:    s_mov_b32 s2, s4
62; GFX6-NEXT:    s_mov_b32 s3, s5
63; GFX6-NEXT:    s_mov_b32 s4, s6
64; GFX6-NEXT:    s_mov_b32 s5, s7
65; GFX6-NEXT:    s_mov_b32 s6, s8
66; GFX6-NEXT:    s_mov_b32 s7, s9
67; GFX6-NEXT:    s_mov_b32 s8, s10
68; GFX6-NEXT:    s_mov_b32 s9, s11
69; GFX6-NEXT:    s_mov_b32 s10, s12
70; GFX6-NEXT:    s_mov_b32 s11, s13
71; GFX6-NEXT:    s_wqm_b64 exec, exec
72; GFX6-NEXT:    s_and_b64 exec, exec, s[14:15]
73; GFX6-NEXT:    image_gather4_c_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1
74; GFX6-NEXT:    s_waitcnt vmcnt(0)
75; GFX6-NEXT:    ; return to shader part epilog
76;
77; GFX10-LABEL: gather4_c_o_2d:
78; GFX10:       ; %bb.0: ; %main_body
79; GFX10-NEXT:    s_mov_b32 s1, exec_lo
80; GFX10-NEXT:    s_mov_b32 s0, s2
81; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
82; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s1
83; GFX10-NEXT:    s_mov_b32 s1, s3
84; GFX10-NEXT:    s_mov_b32 s2, s4
85; GFX10-NEXT:    s_mov_b32 s3, s5
86; GFX10-NEXT:    s_mov_b32 s4, s6
87; GFX10-NEXT:    s_mov_b32 s5, s7
88; GFX10-NEXT:    s_mov_b32 s6, s8
89; GFX10-NEXT:    s_mov_b32 s7, s9
90; GFX10-NEXT:    s_mov_b32 s8, s10
91; GFX10-NEXT:    s_mov_b32 s9, s11
92; GFX10-NEXT:    s_mov_b32 s10, s12
93; GFX10-NEXT:    s_mov_b32 s11, s13
94; GFX10-NEXT:    image_gather4_c_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
95; GFX10-NEXT:    s_waitcnt vmcnt(0)
96; GFX10-NEXT:    ; return to shader part epilog
97main_body:
98  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
99  ret <4 x float> %v
100}
101
102define amdgpu_ps <4 x float> @gather4_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %clamp) {
103; GFX6-LABEL: gather4_cl_o_2d:
104; GFX6:       ; %bb.0: ; %main_body
105; GFX6-NEXT:    s_mov_b64 s[14:15], exec
106; GFX6-NEXT:    s_mov_b32 s0, s2
107; GFX6-NEXT:    s_mov_b32 s1, s3
108; GFX6-NEXT:    s_mov_b32 s2, s4
109; GFX6-NEXT:    s_mov_b32 s3, s5
110; GFX6-NEXT:    s_mov_b32 s4, s6
111; GFX6-NEXT:    s_mov_b32 s5, s7
112; GFX6-NEXT:    s_mov_b32 s6, s8
113; GFX6-NEXT:    s_mov_b32 s7, s9
114; GFX6-NEXT:    s_mov_b32 s8, s10
115; GFX6-NEXT:    s_mov_b32 s9, s11
116; GFX6-NEXT:    s_mov_b32 s10, s12
117; GFX6-NEXT:    s_mov_b32 s11, s13
118; GFX6-NEXT:    s_wqm_b64 exec, exec
119; GFX6-NEXT:    s_and_b64 exec, exec, s[14:15]
120; GFX6-NEXT:    image_gather4_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1
121; GFX6-NEXT:    s_waitcnt vmcnt(0)
122; GFX6-NEXT:    ; return to shader part epilog
123;
124; GFX10-LABEL: gather4_cl_o_2d:
125; GFX10:       ; %bb.0: ; %main_body
126; GFX10-NEXT:    s_mov_b32 s1, exec_lo
127; GFX10-NEXT:    s_mov_b32 s0, s2
128; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
129; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s1
130; GFX10-NEXT:    s_mov_b32 s1, s3
131; GFX10-NEXT:    s_mov_b32 s2, s4
132; GFX10-NEXT:    s_mov_b32 s3, s5
133; GFX10-NEXT:    s_mov_b32 s4, s6
134; GFX10-NEXT:    s_mov_b32 s5, s7
135; GFX10-NEXT:    s_mov_b32 s6, s8
136; GFX10-NEXT:    s_mov_b32 s7, s9
137; GFX10-NEXT:    s_mov_b32 s8, s10
138; GFX10-NEXT:    s_mov_b32 s9, s11
139; GFX10-NEXT:    s_mov_b32 s10, s12
140; GFX10-NEXT:    s_mov_b32 s11, s13
141; GFX10-NEXT:    image_gather4_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
142; GFX10-NEXT:    s_waitcnt vmcnt(0)
143; GFX10-NEXT:    ; return to shader part epilog
144main_body:
145  %v = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
146  ret <4 x float> %v
147}
148
149define amdgpu_ps <4 x float> @gather4_c_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %clamp) {
150; GFX6-LABEL: gather4_c_cl_o_2d:
151; GFX6:       ; %bb.0: ; %main_body
152; GFX6-NEXT:    s_mov_b64 s[14:15], exec
153; GFX6-NEXT:    s_mov_b32 s0, s2
154; GFX6-NEXT:    s_mov_b32 s1, s3
155; GFX6-NEXT:    s_mov_b32 s2, s4
156; GFX6-NEXT:    s_mov_b32 s3, s5
157; GFX6-NEXT:    s_mov_b32 s4, s6
158; GFX6-NEXT:    s_mov_b32 s5, s7
159; GFX6-NEXT:    s_mov_b32 s6, s8
160; GFX6-NEXT:    s_mov_b32 s7, s9
161; GFX6-NEXT:    s_mov_b32 s8, s10
162; GFX6-NEXT:    s_mov_b32 s9, s11
163; GFX6-NEXT:    s_mov_b32 s10, s12
164; GFX6-NEXT:    s_mov_b32 s11, s13
165; GFX6-NEXT:    s_wqm_b64 exec, exec
166; GFX6-NEXT:    s_and_b64 exec, exec, s[14:15]
167; GFX6-NEXT:    image_gather4_c_cl_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1
168; GFX6-NEXT:    s_waitcnt vmcnt(0)
169; GFX6-NEXT:    ; return to shader part epilog
170;
171; GFX10-LABEL: gather4_c_cl_o_2d:
172; GFX10:       ; %bb.0: ; %main_body
173; GFX10-NEXT:    s_mov_b32 s1, exec_lo
174; GFX10-NEXT:    s_mov_b32 s0, s2
175; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
176; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s1
177; GFX10-NEXT:    s_mov_b32 s1, s3
178; GFX10-NEXT:    s_mov_b32 s2, s4
179; GFX10-NEXT:    s_mov_b32 s3, s5
180; GFX10-NEXT:    s_mov_b32 s4, s6
181; GFX10-NEXT:    s_mov_b32 s5, s7
182; GFX10-NEXT:    s_mov_b32 s6, s8
183; GFX10-NEXT:    s_mov_b32 s7, s9
184; GFX10-NEXT:    s_mov_b32 s8, s10
185; GFX10-NEXT:    s_mov_b32 s9, s11
186; GFX10-NEXT:    s_mov_b32 s10, s12
187; GFX10-NEXT:    s_mov_b32 s11, s13
188; GFX10-NEXT:    image_gather4_c_cl_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
189; GFX10-NEXT:    s_waitcnt vmcnt(0)
190; GFX10-NEXT:    ; return to shader part epilog
191main_body:
192  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
193  ret <4 x float> %v
194}
195
196define amdgpu_ps <4 x float> @gather4_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %t) {
197; GFX6-LABEL: gather4_b_o_2d:
198; GFX6:       ; %bb.0: ; %main_body
199; GFX6-NEXT:    s_mov_b64 s[14:15], exec
200; GFX6-NEXT:    s_mov_b32 s0, s2
201; GFX6-NEXT:    s_mov_b32 s1, s3
202; GFX6-NEXT:    s_mov_b32 s2, s4
203; GFX6-NEXT:    s_mov_b32 s3, s5
204; GFX6-NEXT:    s_mov_b32 s4, s6
205; GFX6-NEXT:    s_mov_b32 s5, s7
206; GFX6-NEXT:    s_mov_b32 s6, s8
207; GFX6-NEXT:    s_mov_b32 s7, s9
208; GFX6-NEXT:    s_mov_b32 s8, s10
209; GFX6-NEXT:    s_mov_b32 s9, s11
210; GFX6-NEXT:    s_mov_b32 s10, s12
211; GFX6-NEXT:    s_mov_b32 s11, s13
212; GFX6-NEXT:    s_wqm_b64 exec, exec
213; GFX6-NEXT:    s_and_b64 exec, exec, s[14:15]
214; GFX6-NEXT:    image_gather4_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1
215; GFX6-NEXT:    s_waitcnt vmcnt(0)
216; GFX6-NEXT:    ; return to shader part epilog
217;
218; GFX10-LABEL: gather4_b_o_2d:
219; GFX10:       ; %bb.0: ; %main_body
220; GFX10-NEXT:    s_mov_b32 s1, exec_lo
221; GFX10-NEXT:    s_mov_b32 s0, s2
222; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
223; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s1
224; GFX10-NEXT:    s_mov_b32 s1, s3
225; GFX10-NEXT:    s_mov_b32 s2, s4
226; GFX10-NEXT:    s_mov_b32 s3, s5
227; GFX10-NEXT:    s_mov_b32 s4, s6
228; GFX10-NEXT:    s_mov_b32 s5, s7
229; GFX10-NEXT:    s_mov_b32 s6, s8
230; GFX10-NEXT:    s_mov_b32 s7, s9
231; GFX10-NEXT:    s_mov_b32 s8, s10
232; GFX10-NEXT:    s_mov_b32 s9, s11
233; GFX10-NEXT:    s_mov_b32 s10, s12
234; GFX10-NEXT:    s_mov_b32 s11, s13
235; GFX10-NEXT:    image_gather4_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
236; GFX10-NEXT:    s_waitcnt vmcnt(0)
237; GFX10-NEXT:    ; return to shader part epilog
238main_body:
239  %v = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
240  ret <4 x float> %v
241}
242
243define amdgpu_ps <4 x float> @gather4_c_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t) {
244; GFX6-LABEL: gather4_c_b_o_2d:
245; GFX6:       ; %bb.0: ; %main_body
246; GFX6-NEXT:    s_mov_b64 s[14:15], exec
247; GFX6-NEXT:    s_mov_b32 s0, s2
248; GFX6-NEXT:    s_mov_b32 s1, s3
249; GFX6-NEXT:    s_mov_b32 s2, s4
250; GFX6-NEXT:    s_mov_b32 s3, s5
251; GFX6-NEXT:    s_mov_b32 s4, s6
252; GFX6-NEXT:    s_mov_b32 s5, s7
253; GFX6-NEXT:    s_mov_b32 s6, s8
254; GFX6-NEXT:    s_mov_b32 s7, s9
255; GFX6-NEXT:    s_mov_b32 s8, s10
256; GFX6-NEXT:    s_mov_b32 s9, s11
257; GFX6-NEXT:    s_mov_b32 s10, s12
258; GFX6-NEXT:    s_mov_b32 s11, s13
259; GFX6-NEXT:    s_wqm_b64 exec, exec
260; GFX6-NEXT:    s_and_b64 exec, exec, s[14:15]
261; GFX6-NEXT:    image_gather4_c_b_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1
262; GFX6-NEXT:    s_waitcnt vmcnt(0)
263; GFX6-NEXT:    ; return to shader part epilog
264;
265; GFX10-LABEL: gather4_c_b_o_2d:
266; GFX10:       ; %bb.0: ; %main_body
267; GFX10-NEXT:    s_mov_b32 s1, exec_lo
268; GFX10-NEXT:    s_mov_b32 s0, s2
269; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
270; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s1
271; GFX10-NEXT:    s_mov_b32 s1, s3
272; GFX10-NEXT:    s_mov_b32 s2, s4
273; GFX10-NEXT:    s_mov_b32 s3, s5
274; GFX10-NEXT:    s_mov_b32 s4, s6
275; GFX10-NEXT:    s_mov_b32 s5, s7
276; GFX10-NEXT:    s_mov_b32 s6, s8
277; GFX10-NEXT:    s_mov_b32 s7, s9
278; GFX10-NEXT:    s_mov_b32 s8, s10
279; GFX10-NEXT:    s_mov_b32 s9, s11
280; GFX10-NEXT:    s_mov_b32 s10, s12
281; GFX10-NEXT:    s_mov_b32 s11, s13
282; GFX10-NEXT:    image_gather4_c_b_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
283; GFX10-NEXT:    s_waitcnt vmcnt(0)
284; GFX10-NEXT:    ; return to shader part epilog
285main_body:
286  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
287  ret <4 x float> %v
288}
289
290define amdgpu_ps <4 x float> @gather4_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %t, float %clamp) {
291; GFX6-LABEL: gather4_b_cl_o_2d:
292; GFX6:       ; %bb.0: ; %main_body
293; GFX6-NEXT:    s_mov_b32 s0, s2
294; GFX6-NEXT:    s_mov_b32 s1, s3
295; GFX6-NEXT:    s_mov_b32 s2, s4
296; GFX6-NEXT:    s_mov_b32 s3, s5
297; GFX6-NEXT:    s_mov_b32 s4, s6
298; GFX6-NEXT:    s_mov_b32 s5, s7
299; GFX6-NEXT:    s_mov_b32 s6, s8
300; GFX6-NEXT:    s_mov_b32 s7, s9
301; GFX6-NEXT:    s_mov_b32 s8, s10
302; GFX6-NEXT:    s_mov_b32 s9, s11
303; GFX6-NEXT:    s_mov_b32 s10, s12
304; GFX6-NEXT:    s_mov_b32 s11, s13
305; GFX6-NEXT:    image_gather4_b_cl_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1
306; GFX6-NEXT:    s_waitcnt vmcnt(0)
307; GFX6-NEXT:    ; return to shader part epilog
308;
309; GFX10-LABEL: gather4_b_cl_o_2d:
310; GFX10:       ; %bb.0: ; %main_body
311; GFX10-NEXT:    s_mov_b32 s0, s2
312; GFX10-NEXT:    s_mov_b32 s1, s3
313; GFX10-NEXT:    s_mov_b32 s2, s4
314; GFX10-NEXT:    s_mov_b32 s3, s5
315; GFX10-NEXT:    s_mov_b32 s4, s6
316; GFX10-NEXT:    s_mov_b32 s5, s7
317; GFX10-NEXT:    s_mov_b32 s6, s8
318; GFX10-NEXT:    s_mov_b32 s7, s9
319; GFX10-NEXT:    s_mov_b32 s8, s10
320; GFX10-NEXT:    s_mov_b32 s9, s11
321; GFX10-NEXT:    s_mov_b32 s10, s12
322; GFX10-NEXT:    s_mov_b32 s11, s13
323; GFX10-NEXT:    image_gather4_b_cl_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
324; GFX10-NEXT:    s_waitcnt vmcnt(0)
325; GFX10-NEXT:    ; return to shader part epilog
326main_body:
327  %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
328  ret <4 x float> %v
329}
330
331define amdgpu_ps <4 x float> @gather4_c_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp) {
332; GFX6-LABEL: gather4_c_b_cl_o_2d:
333; GFX6:       ; %bb.0: ; %main_body
334; GFX6-NEXT:    s_mov_b64 s[14:15], exec
335; GFX6-NEXT:    s_mov_b32 s0, s2
336; GFX6-NEXT:    s_mov_b32 s1, s3
337; GFX6-NEXT:    s_mov_b32 s2, s4
338; GFX6-NEXT:    s_mov_b32 s3, s5
339; GFX6-NEXT:    s_mov_b32 s4, s6
340; GFX6-NEXT:    s_mov_b32 s5, s7
341; GFX6-NEXT:    s_mov_b32 s6, s8
342; GFX6-NEXT:    s_mov_b32 s7, s9
343; GFX6-NEXT:    s_mov_b32 s8, s10
344; GFX6-NEXT:    s_mov_b32 s9, s11
345; GFX6-NEXT:    s_mov_b32 s10, s12
346; GFX6-NEXT:    s_mov_b32 s11, s13
347; GFX6-NEXT:    s_wqm_b64 exec, exec
348; GFX6-NEXT:    s_and_b64 exec, exec, s[14:15]
349; GFX6-NEXT:    image_gather4_c_b_cl_o v[0:3], v[0:5], s[0:7], s[8:11] dmask:0x1
350; GFX6-NEXT:    s_waitcnt vmcnt(0)
351; GFX6-NEXT:    ; return to shader part epilog
352;
353; GFX10-LABEL: gather4_c_b_cl_o_2d:
354; GFX10:       ; %bb.0: ; %main_body
355; GFX10-NEXT:    s_mov_b32 s1, exec_lo
356; GFX10-NEXT:    s_mov_b32 s0, s2
357; GFX10-NEXT:    s_wqm_b32 exec_lo, exec_lo
358; GFX10-NEXT:    s_and_b32 exec_lo, exec_lo, s1
359; GFX10-NEXT:    s_mov_b32 s1, s3
360; GFX10-NEXT:    s_mov_b32 s2, s4
361; GFX10-NEXT:    s_mov_b32 s3, s5
362; GFX10-NEXT:    s_mov_b32 s4, s6
363; GFX10-NEXT:    s_mov_b32 s5, s7
364; GFX10-NEXT:    s_mov_b32 s6, s8
365; GFX10-NEXT:    s_mov_b32 s7, s9
366; GFX10-NEXT:    s_mov_b32 s8, s10
367; GFX10-NEXT:    s_mov_b32 s9, s11
368; GFX10-NEXT:    s_mov_b32 s10, s12
369; GFX10-NEXT:    s_mov_b32 s11, s13
370; GFX10-NEXT:    image_gather4_c_b_cl_o v[0:3], v[0:5], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
371; GFX10-NEXT:    s_waitcnt vmcnt(0)
372; GFX10-NEXT:    ; return to shader part epilog
373main_body:
374  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
375  ret <4 x float> %v
376}
377
378define amdgpu_ps <4 x float> @gather4_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) {
379; GFX6-LABEL: gather4_l_o_2d:
380; GFX6:       ; %bb.0: ; %main_body
381; GFX6-NEXT:    s_mov_b32 s0, s2
382; GFX6-NEXT:    s_mov_b32 s1, s3
383; GFX6-NEXT:    s_mov_b32 s2, s4
384; GFX6-NEXT:    s_mov_b32 s3, s5
385; GFX6-NEXT:    s_mov_b32 s4, s6
386; GFX6-NEXT:    s_mov_b32 s5, s7
387; GFX6-NEXT:    s_mov_b32 s6, s8
388; GFX6-NEXT:    s_mov_b32 s7, s9
389; GFX6-NEXT:    s_mov_b32 s8, s10
390; GFX6-NEXT:    s_mov_b32 s9, s11
391; GFX6-NEXT:    s_mov_b32 s10, s12
392; GFX6-NEXT:    s_mov_b32 s11, s13
393; GFX6-NEXT:    image_gather4_l_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1
394; GFX6-NEXT:    s_waitcnt vmcnt(0)
395; GFX6-NEXT:    ; return to shader part epilog
396;
397; GFX10-LABEL: gather4_l_o_2d:
398; GFX10:       ; %bb.0: ; %main_body
399; GFX10-NEXT:    s_mov_b32 s0, s2
400; GFX10-NEXT:    s_mov_b32 s1, s3
401; GFX10-NEXT:    s_mov_b32 s2, s4
402; GFX10-NEXT:    s_mov_b32 s3, s5
403; GFX10-NEXT:    s_mov_b32 s4, s6
404; GFX10-NEXT:    s_mov_b32 s5, s7
405; GFX10-NEXT:    s_mov_b32 s6, s8
406; GFX10-NEXT:    s_mov_b32 s7, s9
407; GFX10-NEXT:    s_mov_b32 s8, s10
408; GFX10-NEXT:    s_mov_b32 s9, s11
409; GFX10-NEXT:    s_mov_b32 s10, s12
410; GFX10-NEXT:    s_mov_b32 s11, s13
411; GFX10-NEXT:    image_gather4_l_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
412; GFX10-NEXT:    s_waitcnt vmcnt(0)
413; GFX10-NEXT:    ; return to shader part epilog
414main_body:
415  %v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
416  ret <4 x float> %v
417}
418
419define amdgpu_ps <4 x float> @gather4_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) {
420; GFX6-LABEL: gather4_c_l_o_2d:
421; GFX6:       ; %bb.0: ; %main_body
422; GFX6-NEXT:    s_mov_b32 s0, s2
423; GFX6-NEXT:    s_mov_b32 s1, s3
424; GFX6-NEXT:    s_mov_b32 s2, s4
425; GFX6-NEXT:    s_mov_b32 s3, s5
426; GFX6-NEXT:    s_mov_b32 s4, s6
427; GFX6-NEXT:    s_mov_b32 s5, s7
428; GFX6-NEXT:    s_mov_b32 s6, s8
429; GFX6-NEXT:    s_mov_b32 s7, s9
430; GFX6-NEXT:    s_mov_b32 s8, s10
431; GFX6-NEXT:    s_mov_b32 s9, s11
432; GFX6-NEXT:    s_mov_b32 s10, s12
433; GFX6-NEXT:    s_mov_b32 s11, s13
434; GFX6-NEXT:    image_gather4_c_l_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1
435; GFX6-NEXT:    s_waitcnt vmcnt(0)
436; GFX6-NEXT:    ; return to shader part epilog
437;
438; GFX10-LABEL: gather4_c_l_o_2d:
439; GFX10:       ; %bb.0: ; %main_body
440; GFX10-NEXT:    s_mov_b32 s0, s2
441; GFX10-NEXT:    s_mov_b32 s1, s3
442; GFX10-NEXT:    s_mov_b32 s2, s4
443; GFX10-NEXT:    s_mov_b32 s3, s5
444; GFX10-NEXT:    s_mov_b32 s4, s6
445; GFX10-NEXT:    s_mov_b32 s5, s7
446; GFX10-NEXT:    s_mov_b32 s6, s8
447; GFX10-NEXT:    s_mov_b32 s7, s9
448; GFX10-NEXT:    s_mov_b32 s8, s10
449; GFX10-NEXT:    s_mov_b32 s9, s11
450; GFX10-NEXT:    s_mov_b32 s10, s12
451; GFX10-NEXT:    s_mov_b32 s11, s13
452; GFX10-NEXT:    image_gather4_c_l_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
453; GFX10-NEXT:    s_waitcnt vmcnt(0)
454; GFX10-NEXT:    ; return to shader part epilog
455main_body:
456  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
457  ret <4 x float> %v
458}
459
460define amdgpu_ps <4 x float> @gather4_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) {
461; GFX6-LABEL: gather4_lz_o_2d:
462; GFX6:       ; %bb.0: ; %main_body
463; GFX6-NEXT:    s_mov_b32 s0, s2
464; GFX6-NEXT:    s_mov_b32 s1, s3
465; GFX6-NEXT:    s_mov_b32 s2, s4
466; GFX6-NEXT:    s_mov_b32 s3, s5
467; GFX6-NEXT:    s_mov_b32 s4, s6
468; GFX6-NEXT:    s_mov_b32 s5, s7
469; GFX6-NEXT:    s_mov_b32 s6, s8
470; GFX6-NEXT:    s_mov_b32 s7, s9
471; GFX6-NEXT:    s_mov_b32 s8, s10
472; GFX6-NEXT:    s_mov_b32 s9, s11
473; GFX6-NEXT:    s_mov_b32 s10, s12
474; GFX6-NEXT:    s_mov_b32 s11, s13
475; GFX6-NEXT:    image_gather4_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1
476; GFX6-NEXT:    s_waitcnt vmcnt(0)
477; GFX6-NEXT:    ; return to shader part epilog
478;
479; GFX10-LABEL: gather4_lz_o_2d:
480; GFX10:       ; %bb.0: ; %main_body
481; GFX10-NEXT:    s_mov_b32 s0, s2
482; GFX10-NEXT:    s_mov_b32 s1, s3
483; GFX10-NEXT:    s_mov_b32 s2, s4
484; GFX10-NEXT:    s_mov_b32 s3, s5
485; GFX10-NEXT:    s_mov_b32 s4, s6
486; GFX10-NEXT:    s_mov_b32 s5, s7
487; GFX10-NEXT:    s_mov_b32 s6, s8
488; GFX10-NEXT:    s_mov_b32 s7, s9
489; GFX10-NEXT:    s_mov_b32 s8, s10
490; GFX10-NEXT:    s_mov_b32 s9, s11
491; GFX10-NEXT:    s_mov_b32 s10, s12
492; GFX10-NEXT:    s_mov_b32 s11, s13
493; GFX10-NEXT:    image_gather4_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
494; GFX10-NEXT:    s_waitcnt vmcnt(0)
495; GFX10-NEXT:    ; return to shader part epilog
496main_body:
497  %v = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
498  ret <4 x float> %v
499}
500
501define amdgpu_ps <4 x float> @gather4_c_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) {
502; GFX6-LABEL: gather4_c_lz_o_2d:
503; GFX6:       ; %bb.0: ; %main_body
504; GFX6-NEXT:    s_mov_b32 s0, s2
505; GFX6-NEXT:    s_mov_b32 s1, s3
506; GFX6-NEXT:    s_mov_b32 s2, s4
507; GFX6-NEXT:    s_mov_b32 s3, s5
508; GFX6-NEXT:    s_mov_b32 s4, s6
509; GFX6-NEXT:    s_mov_b32 s5, s7
510; GFX6-NEXT:    s_mov_b32 s6, s8
511; GFX6-NEXT:    s_mov_b32 s7, s9
512; GFX6-NEXT:    s_mov_b32 s8, s10
513; GFX6-NEXT:    s_mov_b32 s9, s11
514; GFX6-NEXT:    s_mov_b32 s10, s12
515; GFX6-NEXT:    s_mov_b32 s11, s13
516; GFX6-NEXT:    image_gather4_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1
517; GFX6-NEXT:    s_waitcnt vmcnt(0)
518; GFX6-NEXT:    ; return to shader part epilog
519;
520; GFX10-LABEL: gather4_c_lz_o_2d:
521; GFX10:       ; %bb.0: ; %main_body
522; GFX10-NEXT:    s_mov_b32 s0, s2
523; GFX10-NEXT:    s_mov_b32 s1, s3
524; GFX10-NEXT:    s_mov_b32 s2, s4
525; GFX10-NEXT:    s_mov_b32 s3, s5
526; GFX10-NEXT:    s_mov_b32 s4, s6
527; GFX10-NEXT:    s_mov_b32 s5, s7
528; GFX10-NEXT:    s_mov_b32 s6, s8
529; GFX10-NEXT:    s_mov_b32 s7, s9
530; GFX10-NEXT:    s_mov_b32 s8, s10
531; GFX10-NEXT:    s_mov_b32 s9, s11
532; GFX10-NEXT:    s_mov_b32 s10, s12
533; GFX10-NEXT:    s_mov_b32 s11, s13
534; GFX10-NEXT:    image_gather4_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D
535; GFX10-NEXT:    s_waitcnt vmcnt(0)
536; GFX10-NEXT:    ; return to shader part epilog
537main_body:
538  %v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0)
539  ret <4 x float> %v
540}
541
542declare <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 immarg, i32, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
543declare <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
544declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
545declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
546declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
547declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
548declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
549declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
550declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
551declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
552declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 immarg, i32, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
553declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0
554
555attributes #0 = { nounwind readonly }
556