xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.cd.a16.dim.ll (revision 9e9907f1cfa424366fba58d9520f9305b537cec9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
3; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
4
5define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) {
6; GFX9-LABEL: sample_cd_1d:
7; GFX9:       ; %bb.0: ; %main_body
8; GFX9-NEXT:    image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
9; GFX9-NEXT:    s_waitcnt vmcnt(0)
10; GFX9-NEXT:    ; return to shader part epilog
11;
12; GFX10-LABEL: sample_cd_1d:
13; GFX10:       ; %bb.0: ; %main_body
14; GFX10-NEXT:    image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
15; GFX10-NEXT:    s_waitcnt vmcnt(0)
16; GFX10-NEXT:    ; return to shader part epilog
17main_body:
18  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
19  ret <4 x float> %v
20}
21
22define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
23; GFX9-LABEL: sample_cd_2d:
24; GFX9:       ; %bb.0: ; %main_body
25; GFX9-NEXT:    s_mov_b32 s12, 0x5040100
26; GFX9-NEXT:    v_perm_b32 v4, v5, v4, s12
27; GFX9-NEXT:    v_perm_b32 v3, v3, v2, s12
28; GFX9-NEXT:    v_perm_b32 v2, v1, v0, s12
29; GFX9-NEXT:    image_sample_cd v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf a16
30; GFX9-NEXT:    s_waitcnt vmcnt(0)
31; GFX9-NEXT:    ; return to shader part epilog
32;
33; GFX10-LABEL: sample_cd_2d:
34; GFX10:       ; %bb.0: ; %main_body
35; GFX10-NEXT:    v_perm_b32 v4, v5, v4, 0x5040100
36; GFX10-NEXT:    v_perm_b32 v3, v3, v2, 0x5040100
37; GFX10-NEXT:    v_perm_b32 v2, v1, v0, 0x5040100
38; GFX10-NEXT:    image_sample_cd_g16 v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
39; GFX10-NEXT:    s_waitcnt vmcnt(0)
40; GFX10-NEXT:    ; return to shader part epilog
41main_body:
42  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
43  ret <4 x float> %v
44}
45
46define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) {
47; GFX9-LABEL: sample_c_cd_1d:
48; GFX9:       ; %bb.0: ; %main_body
49; GFX9-NEXT:    image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
50; GFX9-NEXT:    s_waitcnt vmcnt(0)
51; GFX9-NEXT:    ; return to shader part epilog
52;
53; GFX10-LABEL: sample_c_cd_1d:
54; GFX10:       ; %bb.0: ; %main_body
55; GFX10-NEXT:    image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
56; GFX10-NEXT:    s_waitcnt vmcnt(0)
57; GFX10-NEXT:    ; return to shader part epilog
58main_body:
59  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
60  ret <4 x float> %v
61}
62
63define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) {
64; GFX9-LABEL: sample_c_cd_2d:
65; GFX9:       ; %bb.0: ; %main_body
66; GFX9-NEXT:    v_mov_b32_e32 v7, v3
67; GFX9-NEXT:    v_mov_b32_e32 v8, v2
68; GFX9-NEXT:    s_mov_b32 s12, 0x5040100
69; GFX9-NEXT:    v_perm_b32 v3, v6, v5, s12
70; GFX9-NEXT:    v_perm_b32 v2, v4, v7, s12
71; GFX9-NEXT:    v_perm_b32 v1, v8, v1, s12
72; GFX9-NEXT:    image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
73; GFX9-NEXT:    s_waitcnt vmcnt(0)
74; GFX9-NEXT:    ; return to shader part epilog
75;
76; GFX10-LABEL: sample_c_cd_2d:
77; GFX10:       ; %bb.0: ; %main_body
78; GFX10-NEXT:    v_perm_b32 v5, v6, v5, 0x5040100
79; GFX10-NEXT:    v_perm_b32 v3, v4, v3, 0x5040100
80; GFX10-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
81; GFX10-NEXT:    image_sample_c_cd_g16 v[0:3], [v0, v1, v3, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
82; GFX10-NEXT:    s_waitcnt vmcnt(0)
83; GFX10-NEXT:    ; return to shader part epilog
84main_body:
85  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
86  ret <4 x float> %v
87}
88
89define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) {
90; GFX9-LABEL: sample_cd_cl_1d:
91; GFX9:       ; %bb.0: ; %main_body
92; GFX9-NEXT:    s_mov_b32 s12, 0x5040100
93; GFX9-NEXT:    v_perm_b32 v2, v3, v2, s12
94; GFX9-NEXT:    image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16
95; GFX9-NEXT:    s_waitcnt vmcnt(0)
96; GFX9-NEXT:    ; return to shader part epilog
97;
98; GFX10-LABEL: sample_cd_cl_1d:
99; GFX10:       ; %bb.0: ; %main_body
100; GFX10-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
101; GFX10-NEXT:    image_sample_cd_cl_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
102; GFX10-NEXT:    s_waitcnt vmcnt(0)
103; GFX10-NEXT:    ; return to shader part epilog
104main_body:
105  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
106  ret <4 x float> %v
107}
108
109define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
110; GFX9-LABEL: sample_cd_cl_2d:
111; GFX9:       ; %bb.0: ; %main_body
112; GFX9-NEXT:    s_mov_b32 s12, 0x5040100
113; GFX9-NEXT:    v_perm_b32 v5, v5, v4, s12
114; GFX9-NEXT:    v_perm_b32 v4, v3, v2, s12
115; GFX9-NEXT:    v_perm_b32 v3, v1, v0, s12
116; GFX9-NEXT:    image_sample_cd_cl v[0:3], v[3:6], s[0:7], s[8:11] dmask:0xf a16
117; GFX9-NEXT:    s_waitcnt vmcnt(0)
118; GFX9-NEXT:    ; return to shader part epilog
119;
120; GFX10-LABEL: sample_cd_cl_2d:
121; GFX10:       ; %bb.0: ; %main_body
122; GFX10-NEXT:    v_perm_b32 v4, v5, v4, 0x5040100
123; GFX10-NEXT:    v_perm_b32 v2, v3, v2, 0x5040100
124; GFX10-NEXT:    v_perm_b32 v0, v1, v0, 0x5040100
125; GFX10-NEXT:    image_sample_cd_cl_g16 v[0:3], [v0, v2, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
126; GFX10-NEXT:    s_waitcnt vmcnt(0)
127; GFX10-NEXT:    ; return to shader part epilog
128main_body:
129  %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
130  ret <4 x float> %v
131}
132
133define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) {
134; GFX9-LABEL: sample_c_cd_cl_1d:
135; GFX9:       ; %bb.0: ; %main_body
136; GFX9-NEXT:    s_mov_b32 s12, 0x5040100
137; GFX9-NEXT:    v_perm_b32 v3, v4, v3, s12
138; GFX9-NEXT:    image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16
139; GFX9-NEXT:    s_waitcnt vmcnt(0)
140; GFX9-NEXT:    ; return to shader part epilog
141;
142; GFX10-LABEL: sample_c_cd_cl_1d:
143; GFX10:       ; %bb.0: ; %main_body
144; GFX10-NEXT:    v_perm_b32 v3, v4, v3, 0x5040100
145; GFX10-NEXT:    image_sample_c_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16
146; GFX10-NEXT:    s_waitcnt vmcnt(0)
147; GFX10-NEXT:    ; return to shader part epilog
148main_body:
149  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
150  ret <4 x float> %v
151}
152
153define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) {
154; GFX9-LABEL: sample_c_cd_cl_2d:
155; GFX9:       ; %bb.0: ; %main_body
156; GFX9-NEXT:    s_mov_b32 s12, 0x5040100
157; GFX9-NEXT:    v_mov_b32_e32 v11, v7
158; GFX9-NEXT:    v_mov_b32_e32 v7, v0
159; GFX9-NEXT:    v_perm_b32 v10, v6, v5, s12
160; GFX9-NEXT:    v_perm_b32 v9, v4, v3, s12
161; GFX9-NEXT:    v_perm_b32 v8, v2, v1, s12
162; GFX9-NEXT:    image_sample_c_cd_cl v[0:3], v[7:11], s[0:7], s[8:11] dmask:0xf a16
163; GFX9-NEXT:    s_waitcnt vmcnt(0)
164; GFX9-NEXT:    ; return to shader part epilog
165;
166; GFX10-LABEL: sample_c_cd_cl_2d:
167; GFX10:       ; %bb.0: ; %main_body
168; GFX10-NEXT:    v_perm_b32 v5, v6, v5, 0x5040100
169; GFX10-NEXT:    v_perm_b32 v3, v4, v3, 0x5040100
170; GFX10-NEXT:    v_perm_b32 v1, v2, v1, 0x5040100
171; GFX10-NEXT:    image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v3, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16
172; GFX10-NEXT:    s_waitcnt vmcnt(0)
173; GFX10-NEXT:    ; return to shader part epilog
174main_body:
175  %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
176  ret <4 x float> %v
177}
178
179declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
180declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
181declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
182declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
183declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
184declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
185declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
186declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1
187
188attributes #0 = { nounwind }
189attributes #1 = { nounwind readonly }
190attributes #2 = { nounwind readnone }
191