xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.gfx90a.ll (revision 9e9907f1cfa424366fba58d9520f9305b537cec9)
1; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A,SDAG %s
2; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -early-live-intervals -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A,SDAG %s
3; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX90A,GISEL %s
4
5; GFX90A-LABEL: {{^}}sample_1d:
6; GFX90A-NOT: s_wqm_b64
7; GFX90A:     image_sample v[{{[0-9:]+}}], v{{[0-9]+}}, s[{{[0-9:]+}}], s[{{[0-9:]+}}] dmask:0xf
8define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
9main_body:
10  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
11  ret <4 x float> %v
12}
13
14; GFX90A-LABEL: {{^}}sample_1d_lwe:
15; GFX90A-NOT: s_wqm_b64
16; GFX90A:     image_sample v[{{[0-9:]+}}], v{{[0-9]+}}, s[{{[0-9:]+}}], s[{{[0-9:]+}}] dmask:0xf lwe
17define amdgpu_ps <4 x float> @sample_1d_lwe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, ptr addrspace(1) inreg %out, float %s) {
18main_body:
19  %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 2, i32 0)
20  %v.vec = extractvalue {<4 x float>, i32} %v, 0
21  %v.err = extractvalue {<4 x float>, i32} %v, 1
22  store i32 %v.err, ptr addrspace(1) %out, align 4
23  ret <4 x float> %v.vec
24}
25
26; GFX90A-LABEL: {{^}}sample_2d:
27; GFX90A-NOT: s_wqm_b64
28; GFX90A:     image_sample v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] dmask:0xf
29define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) {
30main_body:
31  %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
32  ret <4 x float> %v
33}
34
35; GFX90A-LABEL: {{^}}sample_3d:
36; GFX90A-NOT: s_wqm_b64
37; GFX90A:     image_sample v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] dmask:0xf
38define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %r) {
39main_body:
40  %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
41  ret <4 x float> %v
42}
43
44; GFX90A-LABEL: {{^}}sample_cube:
45; GFX90A-NOT: s_wqm_b64
46; GFX90A:     image_sample v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] dmask:0xf da
47define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) {
48main_body:
49  %v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
50  ret <4 x float> %v
51}
52
53; GFX90A-LABEL: {{^}}sample_1darray:
54; GFX90A-NOT: s_wqm_b64
55; GFX90A:     image_sample v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}], s[{{[0-9:]+}}] dmask:0xf da
56define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %slice) {
57main_body:
58  %v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
59  ret <4 x float> %v
60}
61
62; GFX90A-LABEL: {{^}}sample_1d_unorm:
63; GFX90A-NOT: s_wqm_b64
64; GFX90A:     image_sample v[{{[0-9:]+}}], v{{[0-9]+}}, s[{{[0-9:]+}}], s[{{[0-9:]+}}] dmask:0xf unorm
65define amdgpu_ps <4 x float> @sample_1d_unorm(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
66main_body:
67  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 1, i32 0, i32 0)
68  ret <4 x float> %v
69}
70
71; Address register must be even aligned.
72
73; GFX90A-LABEL: {{^}}sample_1d_addr_align:
74; GFX90A: v_mov_b32_e32 [[VADDR:v[0-9]?[02468]]], v1
75; SDAG:   image_sample v{{[0-9]+}}, [[VADDR]], s[{{[0-9:]+}}], s[{{[0-9:]+}}] dmask:0x1
76; GISEL:  image_sample v[{{[0-9:]+}}], [[VADDR]], s[{{[0-9:]+}}], s[{{[0-9:]+}}] dmask:0xf
77define amdgpu_ps float @sample_1d_addr_align(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, <2 x float> %s) {
78main_body:
79  %s1 = extractelement <2 x float> %s, i32 1
80  %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s1, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0)
81  %v1 = extractelement <4 x float> %v, i32 0
82  ret float %v1
83}
84
85declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32)
86declare {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32)
87declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
88declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
89declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
90declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32)
91