1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -o - %s | FileCheck -check-prefix=UNPACKED %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx810 -o - %s | FileCheck -check-prefix=GFX81 %s 4; FIXME: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - %s | FileCheck -check-prefix=GFX9 %s 5; FIXME: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s 6; FIXME: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -o - %s | FileCheck -check-prefix=GFX11 %s 7; FIXME: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -o - %s | FileCheck -check-prefix=GFX12 %s 8 9define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, half %data) { 10; UNPACKED-LABEL: image_store_f16: 11; UNPACKED: ; %bb.0: 12; UNPACKED-NEXT: s_mov_b32 s0, s2 13; UNPACKED-NEXT: s_mov_b32 s1, s3 14; UNPACKED-NEXT: s_mov_b32 s2, s4 15; UNPACKED-NEXT: s_mov_b32 s3, s5 16; UNPACKED-NEXT: s_mov_b32 s4, s6 17; UNPACKED-NEXT: s_mov_b32 s5, s7 18; UNPACKED-NEXT: s_mov_b32 s6, s8 19; UNPACKED-NEXT: s_mov_b32 s7, s9 20; UNPACKED-NEXT: image_store v2, v[0:1], s[0:7] dmask:0x1 unorm d16 21; UNPACKED-NEXT: s_endpgm 22; 23; GFX81-LABEL: image_store_f16: 24; GFX81: ; %bb.0: 25; GFX81-NEXT: s_mov_b32 s0, s2 26; GFX81-NEXT: s_mov_b32 s1, s3 27; GFX81-NEXT: s_mov_b32 s2, s4 28; GFX81-NEXT: s_mov_b32 s3, s5 29; GFX81-NEXT: s_mov_b32 s4, s6 30; GFX81-NEXT: s_mov_b32 s5, s7 31; GFX81-NEXT: s_mov_b32 s6, s8 32; GFX81-NEXT: s_mov_b32 s7, s9 33; GFX81-NEXT: image_store v2, v[0:1], s[0:7] dmask:0x1 unorm d16 34; GFX81-NEXT: s_endpgm 35 call void @llvm.amdgcn.image.store.2d.f16.i32(half %data, i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 36 ret void 37} 38 39define amdgpu_ps void @image_store_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <2 x half> %in) { 40; UNPACKED-LABEL: image_store_v2f16: 41; UNPACKED: ; %bb.0: 42; UNPACKED-NEXT: s_mov_b32 s0, s2 43; UNPACKED-NEXT: s_mov_b32 s1, s3 44; UNPACKED-NEXT: s_mov_b32 s2, s4 45; UNPACKED-NEXT: s_mov_b32 s3, s5 46; UNPACKED-NEXT: s_mov_b32 s4, s6 47; UNPACKED-NEXT: s_mov_b32 s5, s7 48; UNPACKED-NEXT: s_mov_b32 s6, s8 49; UNPACKED-NEXT: s_mov_b32 s7, s9 50; UNPACKED-NEXT: v_lshrrev_b32_e32 v3, 16, v2 51; UNPACKED-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0x3 unorm d16 52; UNPACKED-NEXT: s_endpgm 53; 54; GFX81-LABEL: image_store_v2f16: 55; GFX81: ; %bb.0: 56; GFX81-NEXT: s_mov_b32 s0, s2 57; GFX81-NEXT: s_mov_b32 s1, s3 58; GFX81-NEXT: s_mov_b32 s2, s4 59; GFX81-NEXT: s_mov_b32 s3, s5 60; GFX81-NEXT: s_mov_b32 s4, s6 61; GFX81-NEXT: s_mov_b32 s5, s7 62; GFX81-NEXT: s_mov_b32 s6, s8 63; GFX81-NEXT: s_mov_b32 s7, s9 64; GFX81-NEXT: image_store v[2:3], v[0:1], s[0:7] dmask:0x3 unorm d16 65; GFX81-NEXT: s_endpgm 66 call void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half> %in, i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 67 ret void 68} 69 70define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <3 x half> %in) { 71; UNPACKED-LABEL: image_store_v3f16: 72; UNPACKED: ; %bb.0: 73; UNPACKED-NEXT: v_mov_b32_e32 v5, v1 74; UNPACKED-NEXT: v_mov_b32_e32 v1, v2 75; UNPACKED-NEXT: s_mov_b32 s0, s2 76; UNPACKED-NEXT: s_mov_b32 s1, s3 77; UNPACKED-NEXT: s_mov_b32 s2, s4 78; UNPACKED-NEXT: s_mov_b32 s3, s5 79; UNPACKED-NEXT: s_mov_b32 s4, s6 80; UNPACKED-NEXT: s_mov_b32 s5, s7 81; UNPACKED-NEXT: s_mov_b32 s6, s8 82; UNPACKED-NEXT: s_mov_b32 s7, s9 83; UNPACKED-NEXT: v_mov_b32_e32 v4, v0 84; UNPACKED-NEXT: v_lshrrev_b32_e32 v2, 16, v1 85; UNPACKED-NEXT: image_store v[1:3], v[4:5], s[0:7] dmask:0x7 unorm d16 86; UNPACKED-NEXT: s_endpgm 87; 88; GFX81-LABEL: image_store_v3f16: 89; GFX81: ; %bb.0: 90; GFX81-NEXT: v_lshrrev_b32_e32 v4, 16, v2 91; GFX81-NEXT: v_lshlrev_b32_e32 v4, 16, v4 92; GFX81-NEXT: s_mov_b32 s0, s2 93; GFX81-NEXT: s_mov_b32 s1, s3 94; GFX81-NEXT: s_mov_b32 s2, s4 95; GFX81-NEXT: s_mov_b32 s3, s5 96; GFX81-NEXT: s_mov_b32 s4, s6 97; GFX81-NEXT: s_mov_b32 s5, s7 98; GFX81-NEXT: s_mov_b32 s6, s8 99; GFX81-NEXT: s_mov_b32 s7, s9 100; GFX81-NEXT: v_or_b32_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 101; GFX81-NEXT: v_and_b32_e32 v3, 0xffff, v3 102; GFX81-NEXT: v_mov_b32_e32 v4, 0 103; GFX81-NEXT: image_store v[2:4], v[0:1], s[0:7] dmask:0x7 unorm d16 104; GFX81-NEXT: s_endpgm 105 call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %in, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 106 ret void 107} 108 109define amdgpu_ps void @image_store_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <4 x half> %in) { 110; UNPACKED-LABEL: image_store_v4f16: 111; UNPACKED: ; %bb.0: 112; UNPACKED-NEXT: v_mov_b32_e32 v6, v1 113; UNPACKED-NEXT: v_mov_b32_e32 v1, v2 114; UNPACKED-NEXT: s_mov_b32 s0, s2 115; UNPACKED-NEXT: s_mov_b32 s1, s3 116; UNPACKED-NEXT: s_mov_b32 s2, s4 117; UNPACKED-NEXT: s_mov_b32 s3, s5 118; UNPACKED-NEXT: s_mov_b32 s4, s6 119; UNPACKED-NEXT: s_mov_b32 s5, s7 120; UNPACKED-NEXT: s_mov_b32 s6, s8 121; UNPACKED-NEXT: s_mov_b32 s7, s9 122; UNPACKED-NEXT: v_mov_b32_e32 v5, v0 123; UNPACKED-NEXT: v_lshrrev_b32_e32 v2, 16, v1 124; UNPACKED-NEXT: v_lshrrev_b32_e32 v4, 16, v3 125; UNPACKED-NEXT: image_store v[1:4], v[5:6], s[0:7] dmask:0xf unorm d16 126; UNPACKED-NEXT: s_endpgm 127; 128; GFX81-LABEL: image_store_v4f16: 129; GFX81: ; %bb.0: 130; GFX81-NEXT: s_mov_b32 s0, s2 131; GFX81-NEXT: s_mov_b32 s1, s3 132; GFX81-NEXT: s_mov_b32 s2, s4 133; GFX81-NEXT: s_mov_b32 s3, s5 134; GFX81-NEXT: s_mov_b32 s4, s6 135; GFX81-NEXT: s_mov_b32 s5, s7 136; GFX81-NEXT: s_mov_b32 s6, s8 137; GFX81-NEXT: s_mov_b32 s7, s9 138; GFX81-NEXT: image_store v[2:5], v[0:1], s[0:7] dmask:0xf unorm d16 139; GFX81-NEXT: s_endpgm 140 call void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half> %in, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 141 ret void 142} 143 144declare void @llvm.amdgcn.image.store.2d.f16.i32(half, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 145declare void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 146declare void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 147declare void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half>, i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 148 149attributes #0 = { nounwind writeonly } 150