1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs -show-mc-encoding | FileCheck -enable-var-scope -check-prefixes=PREGFX10-UNPACKED %s 3; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=PREGFX10-PACKED %s 4; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=PREGFX10-PACKED %s 5; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX10-PACKED %s 6; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs | FileCheck -enable-var-scope -check-prefixes=GFX11-PACKED %s 7 8define amdgpu_ps half @tbuffer_load_d16_x(ptr addrspace(8) inreg %rsrc) { 9; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_x: 10; PREGFX10-UNPACKED: ; %bb.0: ; %main_body 11; PREGFX10-UNPACKED-NEXT: tbuffer_load_format_d16_x v0, off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] ; encoding: [0x00,0x00,0xb4,0xe8,0x00,0x00,0x00,0x80] 12; PREGFX10-UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 13; PREGFX10-UNPACKED-NEXT: ; return to shader part epilog 14; 15; PREGFX10-PACKED-LABEL: tbuffer_load_d16_x: 16; PREGFX10-PACKED: ; %bb.0: ; %main_body 17; PREGFX10-PACKED-NEXT: tbuffer_load_format_d16_x v0, off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] 18; PREGFX10-PACKED-NEXT: s_waitcnt vmcnt(0) 19; PREGFX10-PACKED-NEXT: ; return to shader part epilog 20; 21; GFX10-PACKED-LABEL: tbuffer_load_d16_x: 22; GFX10-PACKED: ; %bb.0: ; %main_body 23; GFX10-PACKED-NEXT: tbuffer_load_format_d16_x v0, off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] 24; GFX10-PACKED-NEXT: s_waitcnt vmcnt(0) 25; GFX10-PACKED-NEXT: ; return to shader part epilog 26; 27; GFX11-PACKED-LABEL: tbuffer_load_d16_x: 28; GFX11-PACKED: ; %bb.0: ; %main_body 29; GFX11-PACKED-NEXT: tbuffer_load_d16_format_x v0, off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] 30; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0) 31; GFX11-PACKED-NEXT: ; return to shader part epilog 32main_body: 33 %data = call half @llvm.amdgcn.raw.ptr.tbuffer.load.f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 22, i32 0) 34 ret half %data 35} 36 37define amdgpu_ps half @tbuffer_load_d16_xy(ptr addrspace(8) inreg %rsrc) { 38; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_xy: 39; PREGFX10-UNPACKED: ; %bb.0: ; %main_body 40; PREGFX10-UNPACKED-NEXT: tbuffer_load_format_d16_xy v[0:1], off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] ; encoding: [0x00,0x80,0xb4,0xe8,0x00,0x00,0x00,0x80] 41; PREGFX10-UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 42; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, v1 ; encoding: [0x01,0x03,0x00,0x7e] 43; PREGFX10-UNPACKED-NEXT: ; return to shader part epilog 44; 45; PREGFX10-PACKED-LABEL: tbuffer_load_d16_xy: 46; PREGFX10-PACKED: ; %bb.0: ; %main_body 47; PREGFX10-PACKED-NEXT: tbuffer_load_format_d16_xy v0, off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] 48; PREGFX10-PACKED-NEXT: s_waitcnt vmcnt(0) 49; PREGFX10-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v0 50; PREGFX10-PACKED-NEXT: ; return to shader part epilog 51; 52; GFX10-PACKED-LABEL: tbuffer_load_d16_xy: 53; GFX10-PACKED: ; %bb.0: ; %main_body 54; GFX10-PACKED-NEXT: tbuffer_load_format_d16_xy v0, off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] 55; GFX10-PACKED-NEXT: s_waitcnt vmcnt(0) 56; GFX10-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v0 57; GFX10-PACKED-NEXT: ; return to shader part epilog 58; 59; GFX11-PACKED-LABEL: tbuffer_load_d16_xy: 60; GFX11-PACKED: ; %bb.0: ; %main_body 61; GFX11-PACKED-NEXT: tbuffer_load_d16_format_xy v0, off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] 62; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0) 63; GFX11-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v0 64; GFX11-PACKED-NEXT: ; return to shader part epilog 65main_body: 66 %data = call <2 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 22, i32 0) 67 %elt = extractelement <2 x half> %data, i32 1 68 ret half %elt 69} 70 71define amdgpu_ps half @tbuffer_load_d16_xyz(ptr addrspace(8) inreg %rsrc) { 72; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_xyz: 73; PREGFX10-UNPACKED: ; %bb.0: ; %main_body 74; PREGFX10-UNPACKED-NEXT: tbuffer_load_format_d16_xyz v[0:2], off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] ; encoding: [0x00,0x00,0xb5,0xe8,0x00,0x00,0x00,0x80] 75; PREGFX10-UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 76; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, v2 ; encoding: [0x02,0x03,0x00,0x7e] 77; PREGFX10-UNPACKED-NEXT: ; return to shader part epilog 78; 79; PREGFX10-PACKED-LABEL: tbuffer_load_d16_xyz: 80; PREGFX10-PACKED: ; %bb.0: ; %main_body 81; PREGFX10-PACKED-NEXT: tbuffer_load_format_d16_xyz v[0:1], off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] 82; PREGFX10-PACKED-NEXT: s_waitcnt vmcnt(0) 83; PREGFX10-PACKED-NEXT: v_mov_b32_e32 v0, v1 84; PREGFX10-PACKED-NEXT: ; return to shader part epilog 85; 86; GFX10-PACKED-LABEL: tbuffer_load_d16_xyz: 87; GFX10-PACKED: ; %bb.0: ; %main_body 88; GFX10-PACKED-NEXT: tbuffer_load_format_d16_xyz v[0:1], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] 89; GFX10-PACKED-NEXT: s_waitcnt vmcnt(0) 90; GFX10-PACKED-NEXT: v_mov_b32_e32 v0, v1 91; GFX10-PACKED-NEXT: ; return to shader part epilog 92; 93; GFX11-PACKED-LABEL: tbuffer_load_d16_xyz: 94; GFX11-PACKED: ; %bb.0: ; %main_body 95; GFX11-PACKED-NEXT: tbuffer_load_d16_format_xyz v[0:1], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] 96; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0) 97; GFX11-PACKED-NEXT: v_mov_b32_e32 v0, v1 98; GFX11-PACKED-NEXT: ; return to shader part epilog 99main_body: 100 %data = call <3 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 22, i32 0) 101 %elt = extractelement <3 x half> %data, i32 2 102 ret half %elt 103} 104 105define amdgpu_ps half @tbuffer_load_d16_xyzw(ptr addrspace(8) inreg %rsrc) { 106; PREGFX10-UNPACKED-LABEL: tbuffer_load_d16_xyzw: 107; PREGFX10-UNPACKED: ; %bb.0: ; %main_body 108; PREGFX10-UNPACKED-NEXT: tbuffer_load_format_d16_xyzw v[0:3], off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] ; encoding: [0x00,0x80,0xb5,0xe8,0x00,0x00,0x00,0x80] 109; PREGFX10-UNPACKED-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 110; PREGFX10-UNPACKED-NEXT: v_mov_b32_e32 v0, v3 ; encoding: [0x03,0x03,0x00,0x7e] 111; PREGFX10-UNPACKED-NEXT: ; return to shader part epilog 112; 113; PREGFX10-PACKED-LABEL: tbuffer_load_d16_xyzw: 114; PREGFX10-PACKED: ; %bb.0: ; %main_body 115; PREGFX10-PACKED-NEXT: tbuffer_load_format_d16_xyzw v[0:1], off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] 116; PREGFX10-PACKED-NEXT: s_waitcnt vmcnt(0) 117; PREGFX10-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v1 118; PREGFX10-PACKED-NEXT: ; return to shader part epilog 119; 120; GFX10-PACKED-LABEL: tbuffer_load_d16_xyzw: 121; GFX10-PACKED: ; %bb.0: ; %main_body 122; GFX10-PACKED-NEXT: tbuffer_load_format_d16_xyzw v[0:1], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] 123; GFX10-PACKED-NEXT: s_waitcnt vmcnt(0) 124; GFX10-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v1 125; GFX10-PACKED-NEXT: ; return to shader part epilog 126; 127; GFX11-PACKED-LABEL: tbuffer_load_d16_xyzw: 128; GFX11-PACKED: ; %bb.0: ; %main_body 129; GFX11-PACKED-NEXT: tbuffer_load_d16_format_xyzw v[0:1], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] 130; GFX11-PACKED-NEXT: s_waitcnt vmcnt(0) 131; GFX11-PACKED-NEXT: v_lshrrev_b32_e32 v0, 16, v1 132; GFX11-PACKED-NEXT: ; return to shader part epilog 133main_body: 134 %data = call <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 22, i32 0) 135 %elt = extractelement <4 x half> %data, i32 3 136 ret half %elt 137} 138 139declare half @llvm.amdgcn.raw.ptr.tbuffer.load.f16(ptr addrspace(8), i32, i32, i32, i32) 140declare <2 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v2f16(ptr addrspace(8), i32, i32, i32, i32) 141declare <3 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v3f16(ptr addrspace(8), i32, i32, i32, i32) 142declare <4 x half> @llvm.amdgcn.raw.ptr.tbuffer.load.v4f16(ptr addrspace(8), i32, i32, i32, i32) 143