1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs -show-mc-encoding | FileCheck -enable-var-scope -check-prefixes=PREGFX10-UNPACKED %s 3; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=PREGFX10-PACKED %s 4; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=PREGFX10-PACKED %s 5; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX10-PACKED %s 6; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX11-PACKED %s 7; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX12-PACKED %s 8 9define amdgpu_ps half @tbuffer_load_d16_x(<4 x i32> inreg %rsrc) { 10; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_x: 11; PREGFX10-UNPACKED: ; %bb.0: ; %main_body 12; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] 13; PREGFX10-UNPACKED-NEXT: tbuffer_load_format_d16_x v0, v0, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen ; encoding: [0x00,0x20,0xb4,0xe8,0x00,0x00,0x00,0x80] 14; PREGFX10-UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 15; PREGFX10-UNPACKED-NEXT: ; return to shader part epilog 16; 17; PREGFX10-PACKED-LABEL: tbuffer_load_d16_x: 18; PREGFX10-PACKED: ; %bb.0: ; %main_body 19; PREGFX10-PACKED-NEXT: v_mov_b32_e32 v0, 0 20; PREGFX10-PACKED-NEXT: tbuffer_load_format_d16_x v0, v0, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen 21; PREGFX10-PACKED-NEXT: s_waitcnt vmcnt(0) 22; PREGFX10-PACKED-NEXT: ; return to shader part epilog 23; 24; GFX10-PACKED-LABEL: tbuffer_load_d16_x: 25; GFX10-PACKED: ; %bb.0: ; %main_body 26; GFX10-PACKED-NEXT: v_mov_b32_e32 v0, 0 27; GFX10-PACKED-NEXT: tbuffer_load_format_d16_x v0, v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen 28; GFX10-PACKED-NEXT: s_waitcnt vmcnt(0) 29; GFX10-PACKED-NEXT: ; return to shader part epilog 30; 31; GFX11-PACKED-LABEL: tbuffer_load_d16_x: 32; GFX11-PACKED: ; %bb.0: ; %main_body 33; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, 0 34; GFX11-PACKED-NEXT: tbuffer_load_d16_format_x v0, v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen 35; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0) 36; GFX11-PACKED-NEXT: ; return to shader part epilog 37; 38; GFX12-PACKED-LABEL: tbuffer_load_d16_x: 39; GFX12-PACKED: ; %bb.0: ; %main_body 40; GFX12-PACKED-NEXT: v_mov_b32_e32 v0, 0 41; GFX12-PACKED-NEXT: tbuffer_load_d16_format_x v0, v0, s[0:3], null format:[BUF_FMT_32_FLOAT] idxen 42; GFX12-PACKED-NEXT: s_wait_loadcnt 0x0 43; GFX12-PACKED-NEXT: ; return to shader part epilog 44main_body: 45 %data = call half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 22, i32 0) 46 ret half %data 47} 48 49define amdgpu_ps half @tbuffer_load_d16_xy(<4 x i32> inreg %rsrc) { 50; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_xy: 51; PREGFX10-UNPACKED: ; %bb.0: ; %main_body 52; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] 53; PREGFX10-UNPACKED-NEXT: tbuffer_load_format_d16_xy v[0:1], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen ; encoding: [0x00,0xa0,0xb4,0xe8,0x00,0x00,0x00,0x80] 54; PREGFX10-UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 55; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, v1 ; encoding: [0x01,0x03,0x00,0x7e] 56; PREGFX10-UNPACKED-NEXT: ; return to shader part epilog 57; 58; PREGFX10-PACKED-LABEL: tbuffer_load_d16_xy: 59; PREGFX10-PACKED: ; %bb.0: ; %main_body 60; PREGFX10-PACKED-NEXT: v_mov_b32_e32 v0, 0 61; PREGFX10-PACKED-NEXT: tbuffer_load_format_d16_xy v0, v0, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen 62; PREGFX10-PACKED-NEXT: s_waitcnt vmcnt(0) 63; PREGFX10-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v0 64; PREGFX10-PACKED-NEXT: ; return to shader part epilog 65; 66; GFX10-PACKED-LABEL: tbuffer_load_d16_xy: 67; GFX10-PACKED: ; %bb.0: ; %main_body 68; GFX10-PACKED-NEXT: v_mov_b32_e32 v0, 0 69; GFX10-PACKED-NEXT: tbuffer_load_format_d16_xy v0, v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen 70; GFX10-PACKED-NEXT: s_waitcnt vmcnt(0) 71; GFX10-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v0 72; GFX10-PACKED-NEXT: ; return to shader part epilog 73; 74; GFX11-PACKED-LABEL: tbuffer_load_d16_xy: 75; GFX11-PACKED: ; %bb.0: ; %main_body 76; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, 0 77; GFX11-PACKED-NEXT: tbuffer_load_d16_format_xy v0, v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen 78; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0) 79; GFX11-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v0 80; GFX11-PACKED-NEXT: ; return to shader part epilog 81; 82; GFX12-PACKED-LABEL: tbuffer_load_d16_xy: 83; GFX12-PACKED: ; %bb.0: ; %main_body 84; GFX12-PACKED-NEXT: v_mov_b32_e32 v0, 0 85; GFX12-PACKED-NEXT: tbuffer_load_d16_format_xy v0, v0, s[0:3], null format:[BUF_FMT_32_FLOAT] idxen 86; GFX12-PACKED-NEXT: s_wait_loadcnt 0x0 87; GFX12-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v0 88; GFX12-PACKED-NEXT: ; return to shader part epilog 89main_body: 90 %data = call <2 x half> @llvm.amdgcn.struct.tbuffer.load.v2f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 22, i32 0) 91 %elt = extractelement <2 x half> %data, i32 1 92 ret half %elt 93} 94 95define amdgpu_ps half @tbuffer_load_d16_xyz(<4 x i32> inreg %rsrc) { 96; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_xyz: 97; PREGFX10-UNPACKED: ; %bb.0: ; %main_body 98; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] 99; PREGFX10-UNPACKED-NEXT: tbuffer_load_format_d16_xyz v[0:2], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen ; encoding: [0x00,0x20,0xb5,0xe8,0x00,0x00,0x00,0x80] 100; PREGFX10-UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 101; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, v2 ; encoding: [0x02,0x03,0x00,0x7e] 102; PREGFX10-UNPACKED-NEXT: ; return to shader part epilog 103; 104; PREGFX10-PACKED-LABEL: tbuffer_load_d16_xyz: 105; PREGFX10-PACKED: ; %bb.0: ; %main_body 106; PREGFX10-PACKED-NEXT: v_mov_b32_e32 v0, 0 107; PREGFX10-PACKED-NEXT: tbuffer_load_format_d16_xyz v[0:1], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen 108; PREGFX10-PACKED-NEXT: s_waitcnt vmcnt(0) 109; PREGFX10-PACKED-NEXT: v_mov_b32_e32 v0, v1 110; PREGFX10-PACKED-NEXT: ; return to shader part epilog 111; 112; GFX10-PACKED-LABEL: tbuffer_load_d16_xyz: 113; GFX10-PACKED: ; %bb.0: ; %main_body 114; GFX10-PACKED-NEXT: v_mov_b32_e32 v0, 0 115; GFX10-PACKED-NEXT: tbuffer_load_format_d16_xyz v[0:1], v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen 116; GFX10-PACKED-NEXT: s_waitcnt vmcnt(0) 117; GFX10-PACKED-NEXT: v_mov_b32_e32 v0, v1 118; GFX10-PACKED-NEXT: ; return to shader part epilog 119; 120; GFX11-PACKED-LABEL: tbuffer_load_d16_xyz: 121; GFX11-PACKED: ; %bb.0: ; %main_body 122; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, 0 123; GFX11-PACKED-NEXT: tbuffer_load_d16_format_xyz v[0:1], v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen 124; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0) 125; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, v1 126; GFX11-PACKED-NEXT: ; return to shader part epilog 127; 128; GFX12-PACKED-LABEL: tbuffer_load_d16_xyz: 129; GFX12-PACKED: ; %bb.0: ; %main_body 130; GFX12-PACKED-NEXT: v_mov_b32_e32 v0, 0 131; GFX12-PACKED-NEXT: tbuffer_load_d16_format_xyz v[0:1], v0, s[0:3], null format:[BUF_FMT_32_FLOAT] idxen 132; GFX12-PACKED-NEXT: s_wait_loadcnt 0x0 133; GFX12-PACKED-NEXT: v_mov_b32_e32 v0, v1 134; GFX12-PACKED-NEXT: ; return to shader part epilog 135main_body: 136 %data = call <3 x half> @llvm.amdgcn.struct.tbuffer.load.v3f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 22, i32 0) 137 %elt = extractelement <3 x half> %data, i32 2 138 ret half %elt 139} 140 141define amdgpu_ps half @tbuffer_load_d16_xyzw(<4 x i32> inreg %rsrc) { 142; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_xyzw: 143; PREGFX10-UNPACKED: ; %bb.0: ; %main_body 144; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] 145; PREGFX10-UNPACKED-NEXT: tbuffer_load_format_d16_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen ; encoding: [0x00,0xa0,0xb5,0xe8,0x00,0x00,0x00,0x80] 146; PREGFX10-UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 147; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, v3 ; encoding: [0x03,0x03,0x00,0x7e] 148; PREGFX10-UNPACKED-NEXT: ; return to shader part epilog 149; 150; PREGFX10-PACKED-LABEL: tbuffer_load_d16_xyzw: 151; PREGFX10-PACKED: ; %bb.0: ; %main_body 152; PREGFX10-PACKED-NEXT: v_mov_b32_e32 v0, 0 153; PREGFX10-PACKED-NEXT: tbuffer_load_format_d16_xyzw v[0:1], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen 154; PREGFX10-PACKED-NEXT: s_waitcnt vmcnt(0) 155; PREGFX10-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v1 156; PREGFX10-PACKED-NEXT: ; return to shader part epilog 157; 158; GFX10-PACKED-LABEL: tbuffer_load_d16_xyzw: 159; GFX10-PACKED: ; %bb.0: ; %main_body 160; GFX10-PACKED-NEXT: v_mov_b32_e32 v0, 0 161; GFX10-PACKED-NEXT: tbuffer_load_format_d16_xyzw v[0:1], v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen 162; GFX10-PACKED-NEXT: s_waitcnt vmcnt(0) 163; GFX10-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v1 164; GFX10-PACKED-NEXT: ; return to shader part epilog 165; 166; GFX11-PACKED-LABEL: tbuffer_load_d16_xyzw: 167; GFX11-PACKED: ; %bb.0: ; %main_body 168; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, 0 169; GFX11-PACKED-NEXT: tbuffer_load_d16_format_xyzw v[0:1], v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen 170; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0) 171; GFX11-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v1 172; GFX11-PACKED-NEXT: ; return to shader part epilog 173; 174; GFX12-PACKED-LABEL: tbuffer_load_d16_xyzw: 175; GFX12-PACKED: ; %bb.0: ; %main_body 176; GFX12-PACKED-NEXT: v_mov_b32_e32 v0, 0 177; GFX12-PACKED-NEXT: tbuffer_load_d16_format_xyzw v[0:1], v0, s[0:3], null format:[BUF_FMT_32_FLOAT] idxen 178; GFX12-PACKED-NEXT: s_wait_loadcnt 0x0 179; GFX12-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v1 180; GFX12-PACKED-NEXT: ; return to shader part epilog 181main_body: 182 %data = call <4 x half> @llvm.amdgcn.struct.tbuffer.load.v4f16(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 22, i32 0) 183 %elt = extractelement <4 x half> %data, i32 3 184 ret half %elt 185} 186 187declare half @llvm.amdgcn.struct.tbuffer.load.f16(<4 x i32>, i32, i32, i32, i32, i32) 188declare <2 x half> @llvm.amdgcn.struct.tbuffer.load.v2f16(<4 x i32>, i32, i32, i32, i32, i32) 189declare <3 x half> @llvm.amdgcn.struct.tbuffer.load.v3f16(<4 x i32>, i32, i32, i32, i32, i32) 190declare <4 x half> @llvm.amdgcn.struct.tbuffer.load.v4f16(<4 x i32>, i32, i32, i32, i32, i32) 191