xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.load.a16.ll (revision 9e9907f1cfa424366fba58d9520f9305b537cec9)
1; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
2; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
3; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10 %s
4; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX12 %s
5
6; GCN-LABEL: {{^}}load.f32.1d:
7; GFX9: image_load v0, v0, s[0:7] dmask:0x1 unorm a16
8; GFX10: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D unorm a16
9; GFX12: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_1D a16
10define amdgpu_ps <4 x float> @load.f32.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
11main_body:
12  %x = extractelement <2 x i16> %coords, i32 0
13  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 1, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
14  ret <4 x float> %v
15}
16
17; GCN-LABEL: {{^}}load.v2f32.1d:
18; GFX9: image_load v[0:1], v0, s[0:7] dmask:0x3 unorm a16
19; GFX10: image_load v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D unorm a16
20; GFX12: image_load v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_1D a16
21define amdgpu_ps <4 x float> @load.v2f32.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
22main_body:
23  %x = extractelement <2 x i16> %coords, i32 0
24  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 3, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
25  ret <4 x float> %v
26}
27
28; GCN-LABEL: {{^}}load.v3f32.1d:
29; GFX9: image_load v[0:2], v0, s[0:7] dmask:0x7 unorm a16
30; GFX10: image_load v[0:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D unorm a16
31; GFX12: image_load v[0:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_1D a16
32define amdgpu_ps <4 x float> @load.v3f32.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
33main_body:
34  %x = extractelement <2 x i16> %coords, i32 0
35  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 7, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
36  ret <4 x float> %v
37}
38
39; GCN-LABEL: {{^}}load.v4f32.1d:
40; GFX9: image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16
41; GFX10: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm a16
42; GFX12: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D a16
43define amdgpu_ps <4 x float> @load.v4f32.1d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
44main_body:
45  %x = extractelement <2 x i16> %coords, i32 0
46  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32 15, i16 %x, <8 x i32> %rsrc, i32 0, i32 0)
47  ret <4 x float> %v
48}
49
50; GCN-LABEL: {{^}}load.f32.2d:
51; GFX9: image_load v0, v0, s[0:7] dmask:0x1 unorm a16
52; GFX10: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm a16
53; GFX12: image_load v0, v0, s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D a16
54define amdgpu_ps <4 x float> @load.f32.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
55main_body:
56  %x = extractelement <2 x i16> %coords, i32 0
57  %y = extractelement <2 x i16> %coords, i32 1
58  %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 1, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
59  ret <4 x float> %v
60}
61
62; GCN-LABEL: {{^}}load.v2f32.2d:
63; GFX9: image_load v[0:1], v0, s[0:7] dmask:0x3 unorm a16
64; GFX10: image_load v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm a16
65; GFX12: image_load v[0:1], v0, s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D a16
66define amdgpu_ps <4 x float> @load.v2f32.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
67main_body:
68  %x = extractelement <2 x i16> %coords, i32 0
69  %y = extractelement <2 x i16> %coords, i32 1
70  %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 3, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
71  ret <4 x float> %v
72}
73
74; GCN-LABEL: {{^}}load.v3f32.2d:
75; GFX9: image_load v[0:2], v0, s[0:7] dmask:0x7 unorm a16
76; GFX10: image_load v[0:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_2D unorm a16
77; GFX12: image_load v[0:2], v0, s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_2D a16
78define amdgpu_ps <4 x float> @load.v3f32.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
79main_body:
80  %x = extractelement <2 x i16> %coords, i32 0
81  %y = extractelement <2 x i16> %coords, i32 1
82  %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 7, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
83  ret <4 x float> %v
84}
85
86; GCN-LABEL: {{^}}load.v4f32.2d:
87; GFX9: image_load v[0:3], v0, s[0:7] dmask:0xf unorm a16
88; GFX10: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm a16
89; GFX12: image_load v[0:3], v0, s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D a16
90define amdgpu_ps <4 x float> @load.v4f32.2d(<8 x i32> inreg %rsrc, <2 x i16> %coords) {
91main_body:
92  %x = extractelement <2 x i16> %coords, i32 0
93  %y = extractelement <2 x i16> %coords, i32 1
94  %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32 15, i16 %x, i16 %y, <8 x i32> %rsrc, i32 0, i32 0)
95  ret <4 x float> %v
96}
97
98; GCN-LABEL: {{^}}load.f32.3d:
99; GFX9: image_load v0, v[0:1], s[0:7] dmask:0x1 unorm a16
100; GFX10: image_load v0, v[0:1], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D unorm a16
101; GFX12: image_load v0, [v0, v1], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_3D a16
102define amdgpu_ps <4 x float> @load.f32.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
103main_body:
104  %x = extractelement <2 x i16> %coords_lo, i32 0
105  %y = extractelement <2 x i16> %coords_lo, i32 1
106  %z = extractelement <2 x i16> %coords_hi, i32 0
107  %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 1, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
108  ret <4 x float> %v
109}
110
111; GCN-LABEL: {{^}}load.v2f32.3d:
112; GFX9: image_load v[0:1], v[0:1], s[0:7] dmask:0x3 unorm a16
113; GFX10: image_load v[0:1], v[0:1], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm a16
114; GFX12: image_load v[0:1], [v0, v1], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D a16
115define amdgpu_ps <4 x float> @load.v2f32.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
116main_body:
117  %x = extractelement <2 x i16> %coords_lo, i32 0
118  %y = extractelement <2 x i16> %coords_lo, i32 1
119  %z = extractelement <2 x i16> %coords_hi, i32 0
120  %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 3, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
121  ret <4 x float> %v
122}
123
124; GCN-LABEL: {{^}}load.v3f32.3d:
125; GFX9: image_load v[0:2], v[0:1], s[0:7] dmask:0x7 unorm a16
126; GFX10: image_load v[0:2], v[0:1], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_3D unorm a16
127; GFX12: image_load v[0:2], [v0, v1], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_3D a16
128define amdgpu_ps <4 x float> @load.v3f32.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
129main_body:
130  %x = extractelement <2 x i16> %coords_lo, i32 0
131  %y = extractelement <2 x i16> %coords_lo, i32 1
132  %z = extractelement <2 x i16> %coords_hi, i32 0
133  %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 7, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
134  ret <4 x float> %v
135}
136
137; GCN-LABEL: {{^}}load.v4f32.3d:
138; GFX9: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16
139; GFX10: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm a16
140; GFX12: image_load v[0:3], [v0, v1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D a16
141define amdgpu_ps <4 x float> @load.v4f32.3d(<8 x i32> inreg %rsrc, <2 x i16> %coords_lo, <2 x i16> %coords_hi) {
142main_body:
143  %x = extractelement <2 x i16> %coords_lo, i32 0
144  %y = extractelement <2 x i16> %coords_lo, i32 1
145  %z = extractelement <2 x i16> %coords_hi, i32 0
146  %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32 15, i16 %x, i16 %y, i16 %z, <8 x i32> %rsrc, i32 0, i32 0)
147  ret <4 x float> %v
148}
149
150declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i16(i32, i16, <8 x i32>, i32, i32) #2
151declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i16(i32, i16, i16, <8 x i32>, i32, i32) #2
152declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i16(i32, i16, i16, i16, <8 x i32>, i32, i32) #2
153
154attributes #0 = { nounwind }
155attributes #1 = { nounwind readonly }
156