1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs -show-mc-encoding | FileCheck -enable-var-scope -check-prefixes=PREGFX10-UNPACKED %s 3; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=PREGFX10-PACKED %s 4; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=PREGFX10-PACKED %s 5; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX10-PACKED %s 6; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX11-PACKED %s 7 8define amdgpu_ps half @tbuffer_load_d16_x(ptr addrspace(8) inreg %rsrc) { 9; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_x: 10; PREGFX10-UNPACKED: ; %bb.0: ; %main_body 11; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] 12; PREGFX10-UNPACKED-NEXT: tbuffer_load_format_d16_x v0, v0, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen ; encoding: [0x00,0x20,0xb4,0xe8,0x00,0x00,0x00,0x80] 13; PREGFX10-UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 14; PREGFX10-UNPACKED-NEXT: ; return to shader part epilog 15; 16; PREGFX10-PACKED-LABEL: tbuffer_load_d16_x: 17; PREGFX10-PACKED: ; %bb.0: ; %main_body 18; PREGFX10-PACKED-NEXT: v_mov_b32_e32 v0, 0 19; PREGFX10-PACKED-NEXT: tbuffer_load_format_d16_x v0, v0, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen 20; PREGFX10-PACKED-NEXT: s_waitcnt vmcnt(0) 21; PREGFX10-PACKED-NEXT: ; return to shader part epilog 22; 23; GFX10-PACKED-LABEL: tbuffer_load_d16_x: 24; GFX10-PACKED: ; %bb.0: ; %main_body 25; GFX10-PACKED-NEXT: v_mov_b32_e32 v0, 0 26; GFX10-PACKED-NEXT: tbuffer_load_format_d16_x v0, v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen 27; GFX10-PACKED-NEXT: s_waitcnt vmcnt(0) 28; GFX10-PACKED-NEXT: ; return to shader part epilog 29; 30; GFX11-PACKED-LABEL: tbuffer_load_d16_x: 31; GFX11-PACKED: ; %bb.0: ; %main_body 32; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, 0 33; GFX11-PACKED-NEXT: tbuffer_load_d16_format_x v0, v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen 34; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0) 35; GFX11-PACKED-NEXT: ; return to shader part epilog 36main_body: 37 %data = call half @llvm.amdgcn.struct.ptr.tbuffer.load.f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 22, i32 0) 38 ret half %data 39} 40 41define amdgpu_ps half @tbuffer_load_d16_xy(ptr addrspace(8) inreg %rsrc) { 42; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_xy: 43; PREGFX10-UNPACKED: ; %bb.0: ; %main_body 44; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] 45; PREGFX10-UNPACKED-NEXT: tbuffer_load_format_d16_xy v[0:1], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen ; encoding: [0x00,0xa0,0xb4,0xe8,0x00,0x00,0x00,0x80] 46; PREGFX10-UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 47; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, v1 ; encoding: [0x01,0x03,0x00,0x7e] 48; PREGFX10-UNPACKED-NEXT: ; return to shader part epilog 49; 50; PREGFX10-PACKED-LABEL: tbuffer_load_d16_xy: 51; PREGFX10-PACKED: ; %bb.0: ; %main_body 52; PREGFX10-PACKED-NEXT: v_mov_b32_e32 v0, 0 53; PREGFX10-PACKED-NEXT: tbuffer_load_format_d16_xy v0, v0, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen 54; PREGFX10-PACKED-NEXT: s_waitcnt vmcnt(0) 55; PREGFX10-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v0 56; PREGFX10-PACKED-NEXT: ; return to shader part epilog 57; 58; GFX10-PACKED-LABEL: tbuffer_load_d16_xy: 59; GFX10-PACKED: ; %bb.0: ; %main_body 60; GFX10-PACKED-NEXT: v_mov_b32_e32 v0, 0 61; GFX10-PACKED-NEXT: tbuffer_load_format_d16_xy v0, v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen 62; GFX10-PACKED-NEXT: s_waitcnt vmcnt(0) 63; GFX10-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v0 64; GFX10-PACKED-NEXT: ; return to shader part epilog 65; 66; GFX11-PACKED-LABEL: tbuffer_load_d16_xy: 67; GFX11-PACKED: ; %bb.0: ; %main_body 68; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, 0 69; GFX11-PACKED-NEXT: tbuffer_load_d16_format_xy v0, v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen 70; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0) 71; GFX11-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v0 72; GFX11-PACKED-NEXT: ; return to shader part epilog 73main_body: 74 %data = call <2 x half> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 22, i32 0) 75 %elt = extractelement <2 x half> %data, i32 1 76 ret half %elt 77} 78 79define amdgpu_ps half @tbuffer_load_d16_xyz(ptr addrspace(8) inreg %rsrc) { 80; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_xyz: 81; PREGFX10-UNPACKED: ; %bb.0: ; %main_body 82; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] 83; PREGFX10-UNPACKED-NEXT: tbuffer_load_format_d16_xyz v[0:2], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen ; encoding: [0x00,0x20,0xb5,0xe8,0x00,0x00,0x00,0x80] 84; PREGFX10-UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 85; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, v2 ; encoding: [0x02,0x03,0x00,0x7e] 86; PREGFX10-UNPACKED-NEXT: ; return to shader part epilog 87; 88; PREGFX10-PACKED-LABEL: tbuffer_load_d16_xyz: 89; PREGFX10-PACKED: ; %bb.0: ; %main_body 90; PREGFX10-PACKED-NEXT: v_mov_b32_e32 v0, 0 91; PREGFX10-PACKED-NEXT: tbuffer_load_format_d16_xyz v[0:1], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen 92; PREGFX10-PACKED-NEXT: s_waitcnt vmcnt(0) 93; PREGFX10-PACKED-NEXT: v_mov_b32_e32 v0, v1 94; PREGFX10-PACKED-NEXT: ; return to shader part epilog 95; 96; GFX10-PACKED-LABEL: tbuffer_load_d16_xyz: 97; GFX10-PACKED: ; %bb.0: ; %main_body 98; GFX10-PACKED-NEXT: v_mov_b32_e32 v0, 0 99; GFX10-PACKED-NEXT: tbuffer_load_format_d16_xyz v[0:1], v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen 100; GFX10-PACKED-NEXT: s_waitcnt vmcnt(0) 101; GFX10-PACKED-NEXT: v_mov_b32_e32 v0, v1 102; GFX10-PACKED-NEXT: ; return to shader part epilog 103; 104; GFX11-PACKED-LABEL: tbuffer_load_d16_xyz: 105; GFX11-PACKED: ; %bb.0: ; %main_body 106; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, 0 107; GFX11-PACKED-NEXT: tbuffer_load_d16_format_xyz v[0:1], v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen 108; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0) 109; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, v1 110; GFX11-PACKED-NEXT: ; return to shader part epilog 111main_body: 112 %data = call <3 x half> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 22, i32 0) 113 %elt = extractelement <3 x half> %data, i32 2 114 ret half %elt 115} 116 117define amdgpu_ps half @tbuffer_load_d16_xyzw(ptr addrspace(8) inreg %rsrc) { 118; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_xyzw: 119; PREGFX10-UNPACKED: ; %bb.0: ; %main_body 120; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] 121; PREGFX10-UNPACKED-NEXT: tbuffer_load_format_d16_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen ; encoding: [0x00,0xa0,0xb5,0xe8,0x00,0x00,0x00,0x80] 122; PREGFX10-UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 123; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, v3 ; encoding: [0x03,0x03,0x00,0x7e] 124; PREGFX10-UNPACKED-NEXT: ; return to shader part epilog 125; 126; PREGFX10-PACKED-LABEL: tbuffer_load_d16_xyzw: 127; PREGFX10-PACKED: ; %bb.0: ; %main_body 128; PREGFX10-PACKED-NEXT: v_mov_b32_e32 v0, 0 129; PREGFX10-PACKED-NEXT: tbuffer_load_format_d16_xyzw v[0:1], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen 130; PREGFX10-PACKED-NEXT: s_waitcnt vmcnt(0) 131; PREGFX10-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v1 132; PREGFX10-PACKED-NEXT: ; return to shader part epilog 133; 134; GFX10-PACKED-LABEL: tbuffer_load_d16_xyzw: 135; GFX10-PACKED: ; %bb.0: ; %main_body 136; GFX10-PACKED-NEXT: v_mov_b32_e32 v0, 0 137; GFX10-PACKED-NEXT: tbuffer_load_format_d16_xyzw v[0:1], v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen 138; GFX10-PACKED-NEXT: s_waitcnt vmcnt(0) 139; GFX10-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v1 140; GFX10-PACKED-NEXT: ; return to shader part epilog 141; 142; GFX11-PACKED-LABEL: tbuffer_load_d16_xyzw: 143; GFX11-PACKED: ; %bb.0: ; %main_body 144; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, 0 145; GFX11-PACKED-NEXT: tbuffer_load_d16_format_xyzw v[0:1], v0, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen 146; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0) 147; GFX11-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v1 148; GFX11-PACKED-NEXT: ; return to shader part epilog 149main_body: 150 %data = call <4 x half> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 22, i32 0) 151 %elt = extractelement <4 x half> %data, i32 3 152 ret half %elt 153} 154 155declare half @llvm.amdgcn.struct.ptr.tbuffer.load.f16(ptr addrspace(8), i32, i32, i32, i32, i32) 156declare <2 x half> @llvm.amdgcn.struct.ptr.tbuffer.load.v2f16(ptr addrspace(8), i32, i32, i32, i32, i32) 157declare <3 x half> @llvm.amdgcn.struct.ptr.tbuffer.load.v3f16(ptr addrspace(8), i32, i32, i32, i32, i32) 158declare <4 x half> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f16(ptr addrspace(8), i32, i32, i32, i32, i32) 159