1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs -show-mc-encoding | FileCheck -enable-var-scope -check-prefixes=PREGFX10-UNPACKED %s 3; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=PREGFX10-PACKED %s 4; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=PREGFX10-PACKED %s 5; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX10-PACKED %s 6; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX11-PACKED %s 7; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX12-PACKED %s 8 9define amdgpu_ps half @tbuffer_load_d16_x(<4 x i32> inreg %rsrc) { 10; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_x: 11; PREGFX10-UNPACKED: ; %bb.0: ; %main_body 12; PREGFX10-UNPACKED-NEXT: tbuffer_load_format_d16_x v0, off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] ; encoding: [0x00,0x00,0xb4,0xe8,0x00,0x00,0x00,0x80] 13; PREGFX10-UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 14; PREGFX10-UNPACKED-NEXT: ; return to shader part epilog 15; 16; PREGFX10-PACKED-LABEL: tbuffer_load_d16_x: 17; PREGFX10-PACKED: ; %bb.0: ; %main_body 18; PREGFX10-PACKED-NEXT: tbuffer_load_format_d16_x v0, off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] 19; PREGFX10-PACKED-NEXT: s_waitcnt vmcnt(0) 20; PREGFX10-PACKED-NEXT: ; return to shader part epilog 21; 22; GFX10-PACKED-LABEL: tbuffer_load_d16_x: 23; GFX10-PACKED: ; %bb.0: ; %main_body 24; GFX10-PACKED-NEXT: tbuffer_load_format_d16_x v0, off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] 25; GFX10-PACKED-NEXT: s_waitcnt vmcnt(0) 26; GFX10-PACKED-NEXT: ; return to shader part epilog 27; 28; GFX11-PACKED-LABEL: tbuffer_load_d16_x: 29; GFX11-PACKED: ; %bb.0: ; %main_body 30; GFX11-PACKED-NEXT: tbuffer_load_d16_format_x v0, off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] 31; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0) 32; GFX11-PACKED-NEXT: ; return to shader part epilog 33; 34; GFX12-PACKED-LABEL: tbuffer_load_d16_x: 35; GFX12-PACKED: ; %bb.0: ; %main_body 36; GFX12-PACKED-NEXT: tbuffer_load_d16_format_x v0, off, s[0:3], null format:[BUF_FMT_32_FLOAT] 37; GFX12-PACKED-NEXT: s_wait_loadcnt 0x0 38; GFX12-PACKED-NEXT: ; return to shader part epilog 39main_body: 40 %data = call half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32> %rsrc, i32 0, i32 0, i32 22, i32 0) 41 ret half %data 42} 43 44define amdgpu_ps half @tbuffer_load_d16_xy(<4 x i32> inreg %rsrc) { 45; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_xy: 46; PREGFX10-UNPACKED: ; %bb.0: ; %main_body 47; PREGFX10-UNPACKED-NEXT: tbuffer_load_format_d16_xy v[0:1], off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] ; encoding: [0x00,0x80,0xb4,0xe8,0x00,0x00,0x00,0x80] 48; PREGFX10-UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 49; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, v1 ; encoding: [0x01,0x03,0x00,0x7e] 50; PREGFX10-UNPACKED-NEXT: ; return to shader part epilog 51; 52; PREGFX10-PACKED-LABEL: tbuffer_load_d16_xy: 53; PREGFX10-PACKED: ; %bb.0: ; %main_body 54; PREGFX10-PACKED-NEXT: tbuffer_load_format_d16_xy v0, off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] 55; PREGFX10-PACKED-NEXT: s_waitcnt vmcnt(0) 56; PREGFX10-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v0 57; PREGFX10-PACKED-NEXT: ; return to shader part epilog 58; 59; GFX10-PACKED-LABEL: tbuffer_load_d16_xy: 60; GFX10-PACKED: ; %bb.0: ; %main_body 61; GFX10-PACKED-NEXT: tbuffer_load_format_d16_xy v0, off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] 62; GFX10-PACKED-NEXT: s_waitcnt vmcnt(0) 63; GFX10-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v0 64; GFX10-PACKED-NEXT: ; return to shader part epilog 65; 66; GFX11-PACKED-LABEL: tbuffer_load_d16_xy: 67; GFX11-PACKED: ; %bb.0: ; %main_body 68; GFX11-PACKED-NEXT: tbuffer_load_d16_format_xy v0, off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] 69; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0) 70; GFX11-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v0 71; GFX11-PACKED-NEXT: ; return to shader part epilog 72; 73; GFX12-PACKED-LABEL: tbuffer_load_d16_xy: 74; GFX12-PACKED: ; %bb.0: ; %main_body 75; GFX12-PACKED-NEXT: tbuffer_load_d16_format_xy v0, off, s[0:3], null format:[BUF_FMT_32_FLOAT] 76; GFX12-PACKED-NEXT: s_wait_loadcnt 0x0 77; GFX12-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v0 78; GFX12-PACKED-NEXT: ; return to shader part epilog 79main_body: 80 %data = call <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32> %rsrc, i32 0, i32 0, i32 22, i32 0) 81 %elt = extractelement <2 x half> %data, i32 1 82 ret half %elt 83} 84 85define amdgpu_ps half @tbuffer_load_d16_xyz(<4 x i32> inreg %rsrc) { 86; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_xyz: 87; PREGFX10-UNPACKED: ; %bb.0: ; %main_body 88; PREGFX10-UNPACKED-NEXT: tbuffer_load_format_d16_xyz v[0:2], off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] ; encoding: [0x00,0x00,0xb5,0xe8,0x00,0x00,0x00,0x80] 89; PREGFX10-UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 90; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, v2 ; encoding: [0x02,0x03,0x00,0x7e] 91; PREGFX10-UNPACKED-NEXT: ; return to shader part epilog 92; 93; PREGFX10-PACKED-LABEL: tbuffer_load_d16_xyz: 94; PREGFX10-PACKED: ; %bb.0: ; %main_body 95; PREGFX10-PACKED-NEXT: tbuffer_load_format_d16_xyz v[0:1], off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] 96; PREGFX10-PACKED-NEXT: s_waitcnt vmcnt(0) 97; PREGFX10-PACKED-NEXT: v_mov_b32_e32 v0, v1 98; PREGFX10-PACKED-NEXT: ; return to shader part epilog 99; 100; GFX10-PACKED-LABEL: tbuffer_load_d16_xyz: 101; GFX10-PACKED: ; %bb.0: ; %main_body 102; GFX10-PACKED-NEXT: tbuffer_load_format_d16_xyz v[0:1], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] 103; GFX10-PACKED-NEXT: s_waitcnt vmcnt(0) 104; GFX10-PACKED-NEXT: v_mov_b32_e32 v0, v1 105; GFX10-PACKED-NEXT: ; return to shader part epilog 106; 107; GFX11-PACKED-LABEL: tbuffer_load_d16_xyz: 108; GFX11-PACKED: ; %bb.0: ; %main_body 109; GFX11-PACKED-NEXT: tbuffer_load_d16_format_xyz v[0:1], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] 110; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0) 111; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, v1 112; GFX11-PACKED-NEXT: ; return to shader part epilog 113; 114; GFX12-PACKED-LABEL: tbuffer_load_d16_xyz: 115; GFX12-PACKED: ; %bb.0: ; %main_body 116; GFX12-PACKED-NEXT: tbuffer_load_d16_format_xyz v[0:1], off, s[0:3], null format:[BUF_FMT_32_FLOAT] 117; GFX12-PACKED-NEXT: s_wait_loadcnt 0x0 118; GFX12-PACKED-NEXT: v_mov_b32_e32 v0, v1 119; GFX12-PACKED-NEXT: ; return to shader part epilog 120main_body: 121 %data = call <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32> %rsrc, i32 0, i32 0, i32 22, i32 0) 122 %elt = extractelement <3 x half> %data, i32 2 123 ret half %elt 124} 125 126define amdgpu_ps half @tbuffer_load_d16_xyzw(<4 x i32> inreg %rsrc) { 127; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_xyzw: 128; PREGFX10-UNPACKED: ; %bb.0: ; %main_body 129; PREGFX10-UNPACKED-NEXT: tbuffer_load_format_d16_xyzw v[0:3], off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] ; encoding: [0x00,0x80,0xb5,0xe8,0x00,0x00,0x00,0x80] 130; PREGFX10-UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 131; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, v3 ; encoding: [0x03,0x03,0x00,0x7e] 132; PREGFX10-UNPACKED-NEXT: ; return to shader part epilog 133; 134; PREGFX10-PACKED-LABEL: tbuffer_load_d16_xyzw: 135; PREGFX10-PACKED: ; %bb.0: ; %main_body 136; PREGFX10-PACKED-NEXT: tbuffer_load_format_d16_xyzw v[0:1], off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] 137; PREGFX10-PACKED-NEXT: s_waitcnt vmcnt(0) 138; PREGFX10-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v1 139; PREGFX10-PACKED-NEXT: ; return to shader part epilog 140; 141; GFX10-PACKED-LABEL: tbuffer_load_d16_xyzw: 142; GFX10-PACKED: ; %bb.0: ; %main_body 143; GFX10-PACKED-NEXT: tbuffer_load_format_d16_xyzw v[0:1], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] 144; GFX10-PACKED-NEXT: s_waitcnt vmcnt(0) 145; GFX10-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v1 146; GFX10-PACKED-NEXT: ; return to shader part epilog 147; 148; GFX11-PACKED-LABEL: tbuffer_load_d16_xyzw: 149; GFX11-PACKED: ; %bb.0: ; %main_body 150; GFX11-PACKED-NEXT: tbuffer_load_d16_format_xyzw v[0:1], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] 151; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0) 152; GFX11-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v1 153; GFX11-PACKED-NEXT: ; return to shader part epilog 154; 155; GFX12-PACKED-LABEL: tbuffer_load_d16_xyzw: 156; GFX12-PACKED: ; %bb.0: ; %main_body 157; GFX12-PACKED-NEXT: tbuffer_load_d16_format_xyzw v[0:1], off, s[0:3], null format:[BUF_FMT_32_FLOAT] 158; GFX12-PACKED-NEXT: s_wait_loadcnt 0x0 159; GFX12-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v1 160; GFX12-PACKED-NEXT: ; return to shader part epilog 161main_body: 162 %data = call <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 0, i32 0, i32 22, i32 0) 163 %elt = extractelement <4 x half> %data, i32 3 164 ret half %elt 165} 166 167declare half @llvm.amdgcn.raw.tbuffer.load.f16(<4 x i32>, i32, i32, i32, i32) 168declare <2 x half> @llvm.amdgcn.raw.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32, i32) 169declare <3 x half> @llvm.amdgcn.raw.tbuffer.load.v3f16(<4 x i32>, i32, i32, i32, i32) 170declare <4 x half> @llvm.amdgcn.raw.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32, i32) 171