xref: /llvm-project/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.d16.dim.ll (revision 9e9907f1cfa424366fba58d9520f9305b537cec9)
1; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=GCN,UNPACKED,GFX89 %s
2; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX81,GFX89 %s
3; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=GCN,PACKED,GFX89 %s
4; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s
5; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s
6; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX12 %s
7
8; GCN-LABEL: {{^}}image_load_f16:
9; GFX89: image_load v0, v[0:1], s[0:7] dmask:0x1 unorm d16{{$}}
10; GFX10: image_load v0, v[0:1], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm d16{{$}}
11; GFX12: image_load v0, [v0, v1], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D d16
12define amdgpu_ps half @image_load_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
13main_body:
14  %tex = call half @llvm.amdgcn.image.load.2d.f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
15  ret half %tex
16}
17
18; GCN-LABEL: {{^}}image_load_v2f16:
19; UNPACKED: image_load v[0:1], v[0:1], s[0:7] dmask:0x3 unorm d16{{$}}
20; PACKED: image_load v0, v[0:1], s[0:7] dmask:0x3 unorm d16{{$}}
21; GFX81: image_load v0, v[0:1], s[0:7] dmask:0x3 unorm d16{{$}}
22; GFX10: image_load v0, v[0:1], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm d16{{$}}
23; GFX12: image_load v0, [v0, v1], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D d16
24define amdgpu_ps float @image_load_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
25main_body:
26  %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
27  %r = bitcast <2 x half> %tex to float
28  ret float %r
29}
30
31; GCN-LABEL: {{^}}image_load_v3f16:
32; UNPACKED: image_load v[0:2], v[0:1], s[0:7] dmask:0x7 unorm d16{{$}}
33; PACKED: image_load v[0:1], v[0:1], s[0:7] dmask:0x7 unorm d16{{$}}
34; GFX10: image_load v[0:1], v[0:1], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_2D unorm d16{{$}}
35; GFX12: image_load v[0:1], [v0, v1], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_2D d16
36define amdgpu_ps <2 x float> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
37main_body:
38  %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
39  %ext = shufflevector <3 x half> %tex, <3 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
40  %r = bitcast <4 x half> %ext to <2 x float>
41  ret <2 x float> %r
42}
43
44; GCN-LABEL: {{^}}image_load_v4f16:
45; UNPACKED: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
46; PACKED: image_load v[0:1], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
47; GFX81: image_load v[0:1], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
48; GFX10: image_load v[0:1], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm d16{{$}}
49; GFX12: image_load v[0:1], [v0, v1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D d16
50define amdgpu_ps <2 x float> @image_load_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
51main_body:
52  %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
53  %r = bitcast <4 x half> %tex to <2 x float>
54  ret <2 x float> %r
55}
56
57; GCN-LABEL: {{^}}image_load_mip_v4f16:
58; UNPACKED: image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf unorm d16{{$}}
59; PACKED: image_load_mip v[0:1], v[0:2], s[0:7] dmask:0xf unorm d16{{$}}
60; GFX81: image_load_mip v[0:1], v[0:2], s[0:7] dmask:0xf unorm d16{{$}}
61; GFX10: image_load_mip v[0:1], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm d16{{$}}
62; GFX12: image_load_mip v[0:1], [v0, v1, v2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D d16
63define amdgpu_ps <2 x float> @image_load_mip_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) {
64main_body:
65  %tex = call <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32(i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
66  %r = bitcast <4 x half> %tex to <2 x float>
67  ret <2 x float> %r
68}
69
70; GCN-LABEL: {{^}}image_load_3d_v2f16:
71; UNPACKED: image_load v[0:1], v[0:2], s[0:7] dmask:0x3 unorm d16{{$}}
72; PACKED: image_load v0, v[0:2], s[0:7] dmask:0x3 unorm d16{{$}}
73; GFX81: image_load v0, v[0:2], s[0:7] dmask:0x3 unorm d16{{$}}
74; GFX10: image_load v0, v[0:2], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm d16{{$}}
75; GFX12: image_load v0, [v0, v1, v2], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D d16
76define amdgpu_ps float @image_load_3d_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) {
77main_body:
78  %tex = call <2 x half> @llvm.amdgcn.image.load.3d.v2f16.i32(i32 3, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
79  %x = bitcast <2 x half> %tex to float
80  ret float %x
81}
82
83
84; GCN-LABEL: {{^}}image_load_3d_v3f16:
85; UNPACKED: image_load v[0:2], v[0:2], s[0:7] dmask:0x7 unorm d16
86; PACKED: image_load v[0:1], v[0:2], s[0:7] dmask:0x7 unorm d16
87; GFX81: image_load v[0:1], v[0:2], s[0:7] dmask:0x7 unorm d16
88; GFX10: image_load v[0:1], v[0:2], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_3D unorm d16{{$}}
89; GFX12: image_load v[0:1], [v0, v1, v2], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_3D d16
90define amdgpu_ps <2 x float> @image_load_3d_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) {
91main_body:
92  %tex = call <3 x half> @llvm.amdgcn.image.load.3d.v3f16.i32(i32 7, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
93  %ext = shufflevector <3 x half> %tex, <3 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
94  %res = bitcast <4 x half> %ext to <2 x float>
95  ret <2 x float> %res
96}
97
98; GCN-LABEL: {{^}}image_store_f16
99; GFX89: image_store v2, v[0:1], s[0:7] dmask:0x1 unorm d16{{$}}
100; GFX10: image_store v2, v[0:1], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm d16{{$}}
101; GFX12: image_store v2, [v0, v1], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D d16
102define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, half %data) {
103main_body:
104  call void @llvm.amdgcn.image.store.2d.f16.i32(half %data, i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
105  ret void
106}
107
108; GCN-LABEL: {{^}}image_store_v2f16
109; UNPACKED: v_lshrrev_b32_e32
110; UNPACKED: v_and_b32_e32
111; UNPACKED: image_store v[{{[0-9:]+}}], v[0:1], s[0:7] dmask:0x3 unorm d16{{$}}
112; PACKED: image_store v2, v[0:1], s[0:7] dmask:0x3 unorm d16{{$}}
113; GFX81: image_store v[2:3], v[0:1], s[0:7] dmask:0x3 unorm d16{{$}}
114; GFX10: image_store v2, v[0:1], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm d16{{$}}
115; GFX12: image_store v2, [v0, v1], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D d16
116define amdgpu_ps void @image_store_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, float %in) {
117main_body:
118  %data = bitcast float %in to <2 x half>
119  call void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half> %data, i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
120  ret void
121}
122
123; GCN-LABEL: {{^}}image_store_v3f16:
124; UNPACKED: image_store v[2:4], v[0:1], s[0:7] dmask:0x7 unorm d16
125; PACKED: image_store v[2:3], v[0:1], s[0:7] dmask:0x7 unorm d16
126; GFX81: image_store v[2:4], v[0:1], s[0:7] dmask:0x7 unorm d16
127; GFX10: image_store v[2:3], v[0:1], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_2D unorm d16{{$}}
128; GFX12: image_store v[2:3], [v0, v1], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_2D d16
129define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <2 x float> %in) {
130main_body:
131  %r = bitcast <2 x float> %in to <4 x half>
132  %data = shufflevector <4 x half> %r, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2>
133  call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %data, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
134  ret void
135}
136
137; GCN-LABEL: {{^}}image_store_v4f16
138; UNPACKED: v_lshrrev_b32_e32
139; UNPACKED: v_and_b32_e32
140; UNPACKED: v_lshrrev_b32_e32
141; UNPACKED: v_and_b32_e32
142; UNPACKED: image_store v[{{[0-9:]+}}], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
143; PACKED: image_store v[2:3], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
144; GFX81: image_store v[2:5], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
145; GFX10: image_store v[2:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm d16{{$}}
146; GFX12: image_store v[2:3], [v0, v1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D d16
147define amdgpu_ps void @image_store_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <2 x float> %in) {
148main_body:
149  %data = bitcast <2 x float> %in to <4 x half>
150  call void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half> %data, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
151  ret void
152}
153
154; GCN-LABEL: {{^}}image_store_mip_1d_v4f16
155; UNPACKED: v_lshrrev_b32_e32
156; UNPACKED: v_and_b32_e32
157; UNPACKED: v_lshrrev_b32_e32
158; UNPACKED: v_and_b32_e32
159; UNPACKED: image_store_mip v[{{[0-9:]+}}], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
160; PACKED: image_store_mip v[2:3], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
161; GFX81: image_store_mip v[2:5], v[0:1], s[0:7] dmask:0xf unorm d16{{$}}
162; GFX10: image_store_mip v[2:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm d16{{$}}
163; GFX12: image_store_mip v[2:3], [v0, v1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D d16
164define amdgpu_ps void @image_store_mip_1d_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %mip, <2 x float> %in) {
165main_body:
166  %data = bitcast <2 x float> %in to <4 x half>
167  call void @llvm.amdgcn.image.store.mip.1d.v4f16.i32(<4 x half> %data, i32 15, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0)
168  ret void
169}
170
171declare half @llvm.amdgcn.image.load.2d.f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
172declare <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
173declare <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
174declare <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1
175declare <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
176declare <2 x half> @llvm.amdgcn.image.load.3d.v2f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
177declare <3 x half> @llvm.amdgcn.image.load.3d.v3f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1
178
179declare void @llvm.amdgcn.image.store.2d.f16.i32(half, i32, i32, i32, <8 x i32>, i32, i32) #0
180declare void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half>, i32, i32, i32, <8 x i32>, i32, i32) #0
181declare void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half>, i32, i32, i32, <8 x i32>, i32, i32) #0
182declare void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half>, i32, i32, i32, <8 x i32>, i32, i32) #0
183declare void @llvm.amdgcn.image.store.mip.1d.v4f16.i32(<4 x half>, i32, i32, i32, <8 x i32>, i32, i32) #0
184declare void @llvm.amdgcn.image.store.3d.v2f16.i32(<2 x half>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
185declare void @llvm.amdgcn.image.store.3d.v3f16.i32(<3 x half>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0
186
187attributes #0 = { nounwind }
188attributes #1 = { nounwind readonly }
189attributes #2 = { nounwind readnone }
190