1; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=GCN,UNPACKED,GFX89 %s 2; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX81,GFX89 %s 3; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=GCN,PACKED,GFX89 %s 4; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s 5; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s 6; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX12 %s 7 8; GCN-LABEL: {{^}}image_load_f16: 9; GFX89: image_load v0, v[0:1], s[0:7] dmask:0x1 unorm d16{{$}} 10; GFX10: image_load v0, v[0:1], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm d16{{$}} 11; GFX12: image_load v0, [v0, v1], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D d16 12define amdgpu_ps half @image_load_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 13main_body: 14 %tex = call half @llvm.amdgcn.image.load.2d.f16.i32(i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 15 ret half %tex 16} 17 18; GCN-LABEL: {{^}}image_load_v2f16: 19; UNPACKED: image_load v[0:1], v[0:1], s[0:7] dmask:0x3 unorm d16{{$}} 20; PACKED: image_load v0, v[0:1], s[0:7] dmask:0x3 unorm d16{{$}} 21; GFX81: image_load v0, v[0:1], s[0:7] dmask:0x3 unorm d16{{$}} 22; GFX10: image_load v0, v[0:1], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm d16{{$}} 23; GFX12: image_load v0, [v0, v1], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D d16 24define amdgpu_ps float @image_load_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 25main_body: 26 %tex = call <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 27 %r = bitcast <2 x half> %tex to float 28 ret float %r 29} 30 31; GCN-LABEL: {{^}}image_load_v3f16: 32; UNPACKED: image_load v[0:2], v[0:1], s[0:7] dmask:0x7 unorm d16{{$}} 33; PACKED: image_load v[0:1], v[0:1], s[0:7] dmask:0x7 unorm d16{{$}} 34; GFX10: image_load v[0:1], v[0:1], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_2D unorm d16{{$}} 35; GFX12: image_load v[0:1], [v0, v1], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_2D d16 36define amdgpu_ps <2 x float> @image_load_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 37main_body: 38 %tex = call <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 39 %ext = shufflevector <3 x half> %tex, <3 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 40 %r = bitcast <4 x half> %ext to <2 x float> 41 ret <2 x float> %r 42} 43 44; GCN-LABEL: {{^}}image_load_v4f16: 45; UNPACKED: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm d16{{$}} 46; PACKED: image_load v[0:1], v[0:1], s[0:7] dmask:0xf unorm d16{{$}} 47; GFX81: image_load v[0:1], v[0:1], s[0:7] dmask:0xf unorm d16{{$}} 48; GFX10: image_load v[0:1], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm d16{{$}} 49; GFX12: image_load v[0:1], [v0, v1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D d16 50define amdgpu_ps <2 x float> @image_load_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 51main_body: 52 %tex = call <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 53 %r = bitcast <4 x half> %tex to <2 x float> 54 ret <2 x float> %r 55} 56 57; GCN-LABEL: {{^}}image_load_mip_v4f16: 58; UNPACKED: image_load_mip v[0:3], v[0:2], s[0:7] dmask:0xf unorm d16{{$}} 59; PACKED: image_load_mip v[0:1], v[0:2], s[0:7] dmask:0xf unorm d16{{$}} 60; GFX81: image_load_mip v[0:1], v[0:2], s[0:7] dmask:0xf unorm d16{{$}} 61; GFX10: image_load_mip v[0:1], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm d16{{$}} 62; GFX12: image_load_mip v[0:1], [v0, v1, v2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D d16 63define amdgpu_ps <2 x float> @image_load_mip_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) { 64main_body: 65 %tex = call <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32(i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 66 %r = bitcast <4 x half> %tex to <2 x float> 67 ret <2 x float> %r 68} 69 70; GCN-LABEL: {{^}}image_load_3d_v2f16: 71; UNPACKED: image_load v[0:1], v[0:2], s[0:7] dmask:0x3 unorm d16{{$}} 72; PACKED: image_load v0, v[0:2], s[0:7] dmask:0x3 unorm d16{{$}} 73; GFX81: image_load v0, v[0:2], s[0:7] dmask:0x3 unorm d16{{$}} 74; GFX10: image_load v0, v[0:2], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D unorm d16{{$}} 75; GFX12: image_load v0, [v0, v1, v2], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_3D d16 76define amdgpu_ps float @image_load_3d_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) { 77main_body: 78 %tex = call <2 x half> @llvm.amdgcn.image.load.3d.v2f16.i32(i32 3, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0) 79 %x = bitcast <2 x half> %tex to float 80 ret float %x 81} 82 83 84; GCN-LABEL: {{^}}image_load_3d_v3f16: 85; UNPACKED: image_load v[0:2], v[0:2], s[0:7] dmask:0x7 unorm d16 86; PACKED: image_load v[0:1], v[0:2], s[0:7] dmask:0x7 unorm d16 87; GFX81: image_load v[0:1], v[0:2], s[0:7] dmask:0x7 unorm d16 88; GFX10: image_load v[0:1], v[0:2], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_3D unorm d16{{$}} 89; GFX12: image_load v[0:1], [v0, v1, v2], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_3D d16 90define amdgpu_ps <2 x float> @image_load_3d_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) { 91main_body: 92 %tex = call <3 x half> @llvm.amdgcn.image.load.3d.v3f16.i32(i32 7, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0) 93 %ext = shufflevector <3 x half> %tex, <3 x half> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 94 %res = bitcast <4 x half> %ext to <2 x float> 95 ret <2 x float> %res 96} 97 98; GCN-LABEL: {{^}}image_store_f16 99; GFX89: image_store v2, v[0:1], s[0:7] dmask:0x1 unorm d16{{$}} 100; GFX10: image_store v2, v[0:1], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D unorm d16{{$}} 101; GFX12: image_store v2, [v0, v1], s[0:7] dmask:0x1 dim:SQ_RSRC_IMG_2D d16 102define amdgpu_ps void @image_store_f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, half %data) { 103main_body: 104 call void @llvm.amdgcn.image.store.2d.f16.i32(half %data, i32 1, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 105 ret void 106} 107 108; GCN-LABEL: {{^}}image_store_v2f16 109; UNPACKED: v_lshrrev_b32_e32 110; UNPACKED: v_and_b32_e32 111; UNPACKED: image_store v[{{[0-9:]+}}], v[0:1], s[0:7] dmask:0x3 unorm d16{{$}} 112; PACKED: image_store v2, v[0:1], s[0:7] dmask:0x3 unorm d16{{$}} 113; GFX81: image_store v[2:3], v[0:1], s[0:7] dmask:0x3 unorm d16{{$}} 114; GFX10: image_store v2, v[0:1], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D unorm d16{{$}} 115; GFX12: image_store v2, [v0, v1], s[0:7] dmask:0x3 dim:SQ_RSRC_IMG_2D d16 116define amdgpu_ps void @image_store_v2f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, float %in) { 117main_body: 118 %data = bitcast float %in to <2 x half> 119 call void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half> %data, i32 3, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 120 ret void 121} 122 123; GCN-LABEL: {{^}}image_store_v3f16: 124; UNPACKED: image_store v[2:4], v[0:1], s[0:7] dmask:0x7 unorm d16 125; PACKED: image_store v[2:3], v[0:1], s[0:7] dmask:0x7 unorm d16 126; GFX81: image_store v[2:4], v[0:1], s[0:7] dmask:0x7 unorm d16 127; GFX10: image_store v[2:3], v[0:1], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_2D unorm d16{{$}} 128; GFX12: image_store v[2:3], [v0, v1], s[0:7] dmask:0x7 dim:SQ_RSRC_IMG_2D d16 129define amdgpu_ps void @image_store_v3f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <2 x float> %in) { 130main_body: 131 %r = bitcast <2 x float> %in to <4 x half> 132 %data = shufflevector <4 x half> %r, <4 x half> undef, <3 x i32> <i32 0, i32 1, i32 2> 133 call void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half> %data, i32 7, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 134 ret void 135} 136 137; GCN-LABEL: {{^}}image_store_v4f16 138; UNPACKED: v_lshrrev_b32_e32 139; UNPACKED: v_and_b32_e32 140; UNPACKED: v_lshrrev_b32_e32 141; UNPACKED: v_and_b32_e32 142; UNPACKED: image_store v[{{[0-9:]+}}], v[0:1], s[0:7] dmask:0xf unorm d16{{$}} 143; PACKED: image_store v[2:3], v[0:1], s[0:7] dmask:0xf unorm d16{{$}} 144; GFX81: image_store v[2:5], v[0:1], s[0:7] dmask:0xf unorm d16{{$}} 145; GFX10: image_store v[2:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm d16{{$}} 146; GFX12: image_store v[2:3], [v0, v1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D d16 147define amdgpu_ps void @image_store_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %t, <2 x float> %in) { 148main_body: 149 %data = bitcast <2 x float> %in to <4 x half> 150 call void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half> %data, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 151 ret void 152} 153 154; GCN-LABEL: {{^}}image_store_mip_1d_v4f16 155; UNPACKED: v_lshrrev_b32_e32 156; UNPACKED: v_and_b32_e32 157; UNPACKED: v_lshrrev_b32_e32 158; UNPACKED: v_and_b32_e32 159; UNPACKED: image_store_mip v[{{[0-9:]+}}], v[0:1], s[0:7] dmask:0xf unorm d16{{$}} 160; PACKED: image_store_mip v[2:3], v[0:1], s[0:7] dmask:0xf unorm d16{{$}} 161; GFX81: image_store_mip v[2:5], v[0:1], s[0:7] dmask:0xf unorm d16{{$}} 162; GFX10: image_store_mip v[2:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D unorm d16{{$}} 163; GFX12: image_store_mip v[2:3], [v0, v1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_1D d16 164define amdgpu_ps void @image_store_mip_1d_v4f16(<8 x i32> inreg %rsrc, i32 %s, i32 %mip, <2 x float> %in) { 165main_body: 166 %data = bitcast <2 x float> %in to <4 x half> 167 call void @llvm.amdgcn.image.store.mip.1d.v4f16.i32(<4 x half> %data, i32 15, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 168 ret void 169} 170 171declare half @llvm.amdgcn.image.load.2d.f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 172declare <2 x half> @llvm.amdgcn.image.load.2d.v2f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 173declare <3 x half> @llvm.amdgcn.image.load.2d.v3f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 174declare <4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 175declare <4 x half> @llvm.amdgcn.image.load.mip.2d.v4f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 176declare <2 x half> @llvm.amdgcn.image.load.3d.v2f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 177declare <3 x half> @llvm.amdgcn.image.load.3d.v3f16.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 178 179declare void @llvm.amdgcn.image.store.2d.f16.i32(half, i32, i32, i32, <8 x i32>, i32, i32) #0 180declare void @llvm.amdgcn.image.store.2d.v2f16.i32(<2 x half>, i32, i32, i32, <8 x i32>, i32, i32) #0 181declare void @llvm.amdgcn.image.store.2d.v3f16.i32(<3 x half>, i32, i32, i32, <8 x i32>, i32, i32) #0 182declare void @llvm.amdgcn.image.store.2d.v4f16.i32(<4 x half>, i32, i32, i32, <8 x i32>, i32, i32) #0 183declare void @llvm.amdgcn.image.store.mip.1d.v4f16.i32(<4 x half>, i32, i32, i32, <8 x i32>, i32, i32) #0 184declare void @llvm.amdgcn.image.store.3d.v2f16.i32(<2 x half>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 185declare void @llvm.amdgcn.image.store.3d.v3f16.i32(<3 x half>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 186 187attributes #0 = { nounwind } 188attributes #1 = { nounwind readonly } 189attributes #2 = { nounwind readnone } 190