xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.image.store.2d.d16.ll (revision 26b14aedb7a936d43fa753cf9f311524f2c7ad70)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -o - %s | FileCheck -check-prefix=UNPACKED %s
3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -o - %s | FileCheck -check-prefix=GFX81 %s
4; FIXME: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s
5; FIXME: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s
6; FIXME: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -o - %s | FileCheck -check-prefix=GFX11 %s
7; FIXME: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -o - %s | FileCheck -check-prefix=GFX12 %s
8
9define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, half %data) {
10; UNPACKED-LABEL: image_store_f16:
11; UNPACKED:       ; %bb.0:
12; UNPACKED-NEXT:    s_mov_b32 s0, s2
13; UNPACKED-NEXT:    s_mov_b32 s1, s3
14; UNPACKED-NEXT:    s_mov_b32 s2, s4
15; UNPACKED-NEXT:    s_mov_b32 s3, s5
16; UNPACKED-NEXT:    s_mov_b32 s4, s6
17; UNPACKED-NEXT:    s_mov_b32 s5, s7
18; UNPACKED-NEXT:    s_mov_b32 s6, s8
19; UNPACKED-NEXT:    s_mov_b32 s7, s9
20; UNPACKED-NEXT:    image_store v2, v[0:1], s[0:7] dmask:0x1 unorm d16
21; UNPACKED-NEXT:    s_endpgm
22;
23; GFX81-LABEL: image_store_f16:
24; GFX81:       ; %bb.0:
25; GFX81-NEXT:    s_mov_b32 s0, s2
26; GFX81-NEXT:    s_mov_b32 s1, s3
27; GFX81-NEXT:    s_mov_b32 s2, s4
28; GFX81-NEXT:    s_mov_b32 s3, s5
29; GFX81-NEXT:    s_mov_b32 s4, s6
30; GFX81-NEXT:    s_mov_b32 s5, s7
31; GFX81-NEXT:    s_mov_b32 s6, s8
32; GFX81-NEXT:    s_mov_b32 s7, s9
33; GFX81-NEXT:    image_store v2, v[0:1], s[0:7] dmask:0x1 unorm d16
34; GFX81-NEXT:    s_endpgm
35  call void @llvm.amdgcn.image.store.2d.f16.i32(half %data, i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
36  ret void
37}
38
39define amdgpu_ps void @image_store_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <2 x half> %in) {
40; UNPACKED-LABEL: image_store_v2f16:
41; UNPACKED:       ; %bb.0:
42; UNPACKED-NEXT:    s_mov_b32 s0, s2
43; UNPACKED-NEXT:    s_mov_b32 s1, s3
44; UNPACKED-NEXT:    s_mov_b32 s2, s4
45; UNPACKED-NEXT:    s_mov_b32 s3, s5
46; UNPACKED-NEXT:    s_mov_b32 s4, s6
47; UNPACKED-NEXT:    s_mov_b32 s5, s7
48; UNPACKED-NEXT:    s_mov_b32 s6, s8
49; UNPACKED-NEXT:    s_mov_b32 s7, s9
50; UNPACKED-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
51; UNPACKED-NEXT:    image_store v[2:3], v[0:1], s[0:7] dmask:0x3 unorm d16
52; UNPACKED-NEXT:    s_endpgm
53;
54; GFX81-LABEL: image_store_v2f16:
55; GFX81:       ; %bb.0:
56; GFX81-NEXT:    s_mov_b32 s0, s2
57; GFX81-NEXT:    s_mov_b32 s1, s3
58; GFX81-NEXT:    s_mov_b32 s2, s4
59; GFX81-NEXT:    s_mov_b32 s3, s5
60; GFX81-NEXT:    s_mov_b32 s4, s6
61; GFX81-NEXT:    s_mov_b32 s5, s7
62; GFX81-NEXT:    s_mov_b32 s6, s8
63; GFX81-NEXT:    s_mov_b32 s7, s9
64; GFX81-NEXT:    image_store v[2:3], v[0:1], s[0:7] dmask:0x3 unorm d16
65; GFX81-NEXT:    s_endpgm
66  call void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half> %in, i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
67  ret void
68}
69
70define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <3 x half> %in) {
71; UNPACKED-LABEL: image_store_v3f16:
72; UNPACKED:       ; %bb.0:
73; UNPACKED-NEXT:    v_mov_b32_e32 v5, v1
74; UNPACKED-NEXT:    v_mov_b32_e32 v1, v2
75; UNPACKED-NEXT:    s_mov_b32 s0, s2
76; UNPACKED-NEXT:    s_mov_b32 s1, s3
77; UNPACKED-NEXT:    s_mov_b32 s2, s4
78; UNPACKED-NEXT:    s_mov_b32 s3, s5
79; UNPACKED-NEXT:    s_mov_b32 s4, s6
80; UNPACKED-NEXT:    s_mov_b32 s5, s7
81; UNPACKED-NEXT:    s_mov_b32 s6, s8
82; UNPACKED-NEXT:    s_mov_b32 s7, s9
83; UNPACKED-NEXT:    v_mov_b32_e32 v4, v0
84; UNPACKED-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
85; UNPACKED-NEXT:    image_store v[1:3], v[4:5], s[0:7] dmask:0x7 unorm d16
86; UNPACKED-NEXT:    s_endpgm
87;
88; GFX81-LABEL: image_store_v3f16:
89; GFX81:       ; %bb.0:
90; GFX81-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
91; GFX81-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
92; GFX81-NEXT:    s_mov_b32 s0, s2
93; GFX81-NEXT:    s_mov_b32 s1, s3
94; GFX81-NEXT:    s_mov_b32 s2, s4
95; GFX81-NEXT:    s_mov_b32 s3, s5
96; GFX81-NEXT:    s_mov_b32 s4, s6
97; GFX81-NEXT:    s_mov_b32 s5, s7
98; GFX81-NEXT:    s_mov_b32 s6, s8
99; GFX81-NEXT:    s_mov_b32 s7, s9
100; GFX81-NEXT:    v_or_b32_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
101; GFX81-NEXT:    v_and_b32_e32 v3, 0xffff, v3
102; GFX81-NEXT:    v_mov_b32_e32 v4, 0
103; GFX81-NEXT:    image_store v[2:4], v[0:1], s[0:7] dmask:0x7 unorm d16
104; GFX81-NEXT:    s_endpgm
105  call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %in, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
106  ret void
107}
108
109define amdgpu_ps void @image_store_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <4 x half> %in) {
110; UNPACKED-LABEL: image_store_v4f16:
111; UNPACKED:       ; %bb.0:
112; UNPACKED-NEXT:    v_mov_b32_e32 v6, v1
113; UNPACKED-NEXT:    v_mov_b32_e32 v1, v2
114; UNPACKED-NEXT:    s_mov_b32 s0, s2
115; UNPACKED-NEXT:    s_mov_b32 s1, s3
116; UNPACKED-NEXT:    s_mov_b32 s2, s4
117; UNPACKED-NEXT:    s_mov_b32 s3, s5
118; UNPACKED-NEXT:    s_mov_b32 s4, s6
119; UNPACKED-NEXT:    s_mov_b32 s5, s7
120; UNPACKED-NEXT:    s_mov_b32 s6, s8
121; UNPACKED-NEXT:    s_mov_b32 s7, s9
122; UNPACKED-NEXT:    v_mov_b32_e32 v5, v0
123; UNPACKED-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
124; UNPACKED-NEXT:    v_lshrrev_b32_e32 v4, 16, v3
125; UNPACKED-NEXT:    image_store v[1:4], v[5:6], s[0:7] dmask:0xf unorm d16
126; UNPACKED-NEXT:    s_endpgm
127;
128; GFX81-LABEL: image_store_v4f16:
129; GFX81:       ; %bb.0:
130; GFX81-NEXT:    s_mov_b32 s0, s2
131; GFX81-NEXT:    s_mov_b32 s1, s3
132; GFX81-NEXT:    s_mov_b32 s2, s4
133; GFX81-NEXT:    s_mov_b32 s3, s5
134; GFX81-NEXT:    s_mov_b32 s4, s6
135; GFX81-NEXT:    s_mov_b32 s5, s7
136; GFX81-NEXT:    s_mov_b32 s6, s8
137; GFX81-NEXT:    s_mov_b32 s7, s9
138; GFX81-NEXT:    image_store v[2:5], v[0:1], s[0:7] dmask:0xf unorm d16
139; GFX81-NEXT:    s_endpgm
140  call void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half> %in, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
141  ret void
142}
143
144declare void @llvm.amdgcn.image.store.2d.f16.i32(half, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
145declare void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
146declare void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
147declare void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0
148
149attributes #0 = { nounwind writeonly }
150