1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2;RUN: llc < %s -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefixes=PREGFX10 %s 3;RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=PREGFX10 %s 4;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GFX10 %s 5;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck -check-prefixes=GFX11 %s 6 7define amdgpu_vs {<4 x float>, <4 x float>, <4 x float>, <4 x float>} @tbuffer_load(ptr addrspace(8) inreg) { 8; PREGFX10-LABEL: tbuffer_load: 9; PREGFX10: ; %bb.0: ; %main_body 10; PREGFX10-NEXT: v_mov_b32_e32 v12, 0 11; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v12, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] idxen 12; PREGFX10-NEXT: tbuffer_load_format_xyzw v[4:7], v12, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen glc 13; PREGFX10-NEXT: tbuffer_load_format_xyzw v[8:11], v12, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen slc 14; PREGFX10-NEXT: tbuffer_load_format_xyzw v[12:15], v12, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen glc 15; PREGFX10-NEXT: s_waitcnt vmcnt(0) 16; PREGFX10-NEXT: ; return to shader part epilog 17; 18; GFX10-LABEL: tbuffer_load: 19; GFX10: ; %bb.0: ; %main_body 20; GFX10-NEXT: v_mov_b32_e32 v16, 0 21; GFX10-NEXT: s_clause 0x3 22; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v16, s[0:3], 0 format:78 idxen 23; GFX10-NEXT: tbuffer_load_format_xyzw v[4:7], v16, s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen glc 24; GFX10-NEXT: tbuffer_load_format_xyzw v[8:11], v16, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen slc 25; GFX10-NEXT: tbuffer_load_format_xyzw v[12:15], v16, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen glc dlc 26; GFX10-NEXT: s_waitcnt vmcnt(0) 27; GFX10-NEXT: ; return to shader part epilog 28; 29; GFX11-LABEL: tbuffer_load: 30; GFX11: ; %bb.0: ; %main_body 31; GFX11-NEXT: v_mov_b32_e32 v12, 0 32; GFX11-NEXT: s_clause 0x3 33; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v12, s[0:3], 0 format:78 idxen 34; GFX11-NEXT: tbuffer_load_format_xyzw v[4:7], v12, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen glc 35; GFX11-NEXT: tbuffer_load_format_xyzw v[8:11], v12, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen slc 36; GFX11-NEXT: tbuffer_load_format_xyzw v[12:15], v12, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen glc dlc 37; GFX11-NEXT: s_waitcnt vmcnt(0) 38; GFX11-NEXT: ; return to shader part epilog 39main_body: 40 %vdata = call <4 x i32> @llvm.amdgcn.struct.ptr.tbuffer.load.v4i32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 78, i32 0) 41 %vdata_glc = call <4 x i32> @llvm.amdgcn.struct.ptr.tbuffer.load.v4i32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 63, i32 1) 42 %vdata_slc = call <4 x i32> @llvm.amdgcn.struct.ptr.tbuffer.load.v4i32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 22, i32 2) 43 %vdata_f32 = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 22, i32 5) 44 %vdata.f = bitcast <4 x i32> %vdata to <4 x float> 45 %vdata_glc.f = bitcast <4 x i32> %vdata_glc to <4 x float> 46 %vdata_slc.f = bitcast <4 x i32> %vdata_slc to <4 x float> 47 %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %vdata.f, 0 48 %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %vdata_glc.f, 1 49 %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %vdata_slc.f, 2 50 %r3 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r2, <4 x float> %vdata_f32, 3 51 ret {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r3 52} 53 54define amdgpu_vs <4 x float> @tbuffer_load_immoffs(ptr addrspace(8) inreg) { 55; PREGFX10-LABEL: tbuffer_load_immoffs: 56; PREGFX10: ; %bb.0: ; %main_body 57; PREGFX10-NEXT: v_mov_b32_e32 v0, 0 58; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:42 59; PREGFX10-NEXT: s_waitcnt vmcnt(0) 60; PREGFX10-NEXT: ; return to shader part epilog 61; 62; GFX10-LABEL: tbuffer_load_immoffs: 63; GFX10: ; %bb.0: ; %main_body 64; GFX10-NEXT: v_mov_b32_e32 v0, 0 65; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:78 idxen offset:42 66; GFX10-NEXT: s_waitcnt vmcnt(0) 67; GFX10-NEXT: ; return to shader part epilog 68; 69; GFX11-LABEL: tbuffer_load_immoffs: 70; GFX11: ; %bb.0: ; %main_body 71; GFX11-NEXT: v_mov_b32_e32 v0, 0 72; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:78 idxen offset:42 73; GFX11-NEXT: s_waitcnt vmcnt(0) 74; GFX11-NEXT: ; return to shader part epilog 75main_body: 76 %vdata = call <4 x i32> @llvm.amdgcn.struct.ptr.tbuffer.load.v4i32(ptr addrspace(8) %0, i32 0, i32 42, i32 0, i32 78, i32 0) 77 %vdata.f = bitcast <4 x i32> %vdata to <4 x float> 78 ret <4 x float> %vdata.f 79} 80 81define amdgpu_vs {<4 x float>, <4 x float>, <4 x float>} @tbuffer_load_immoffs_large(ptr addrspace(8) inreg, i32 inreg %soffs) { 82; PREGFX10-LABEL: tbuffer_load_immoffs_large: 83; PREGFX10: ; %bb.0: 84; PREGFX10-NEXT: v_mov_b32_e32 v8, 0 85; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v8, s[0:3], 61 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] idxen offset:4095 86; PREGFX10-NEXT: tbuffer_load_format_xyzw v[4:7], v8, s[0:3], s4 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_SSCALED] idxen offset:73 87; PREGFX10-NEXT: tbuffer_load_format_xyzw v[8:11], v8, s[0:3], s4 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:1 88; PREGFX10-NEXT: s_waitcnt vmcnt(0) 89; PREGFX10-NEXT: ; return to shader part epilog 90; 91; GFX10-LABEL: tbuffer_load_immoffs_large: 92; GFX10: ; %bb.0: 93; GFX10-NEXT: v_mov_b32_e32 v12, 0 94; GFX10-NEXT: s_clause 0x2 95; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v12, s[0:3], 61 format:[BUF_FMT_10_10_10_2_SSCALED] idxen offset:4095 96; GFX10-NEXT: tbuffer_load_format_xyzw v[4:7], v12, s[0:3], s4 format:[BUF_FMT_32_32_UINT] idxen offset:73 97; GFX10-NEXT: tbuffer_load_format_xyzw v[8:11], v12, s[0:3], s4 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offset:1 98; GFX10-NEXT: s_waitcnt vmcnt(0) 99; GFX10-NEXT: ; return to shader part epilog 100; 101; GFX11-LABEL: tbuffer_load_immoffs_large: 102; GFX11: ; %bb.0: 103; GFX11-NEXT: v_mov_b32_e32 v8, 0 104; GFX11-NEXT: s_clause 0x2 105; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v8, s[0:3], 61 format:[BUF_FMT_8_8_8_8_SINT] idxen offset:4095 106; GFX11-NEXT: tbuffer_load_format_xyzw v[4:7], v8, s[0:3], s4 format:[BUF_FMT_32_32_32_32_SINT] idxen offset:73 107; GFX11-NEXT: tbuffer_load_format_xyzw v[8:11], v8, s[0:3], s4 format:77 idxen offset:1 108; GFX11-NEXT: s_waitcnt vmcnt(0) 109; GFX11-NEXT: ; return to shader part epilog 110 %vdata = call <4 x i32> @llvm.amdgcn.struct.ptr.tbuffer.load.v4i32(ptr addrspace(8) %0, i32 0, i32 4095, i32 61, i32 47, i32 0) 111 %vdata_glc = call <4 x i32> @llvm.amdgcn.struct.ptr.tbuffer.load.v4i32(ptr addrspace(8) %0, i32 0, i32 73, i32 %soffs, i32 62, i32 0) 112 %vdata_slc = call <4 x i32> @llvm.amdgcn.struct.ptr.tbuffer.load.v4i32(ptr addrspace(8) %0, i32 0, i32 1, i32 %soffs, i32 77, i32 0) 113 %vdata.f = bitcast <4 x i32> %vdata to <4 x float> 114 %vdata_glc.f = bitcast <4 x i32> %vdata_glc to <4 x float> 115 %vdata_slc.f = bitcast <4 x i32> %vdata_slc to <4 x float> 116 %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %vdata.f, 0 117 %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %vdata_glc.f, 1 118 %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %vdata_slc.f, 2 119 ret {<4 x float>, <4 x float>, <4 x float>} %r2 120} 121 122define amdgpu_vs <4 x float> @tbuffer_load_idx(ptr addrspace(8) inreg, i32 %vindex) { 123; PREGFX10-LABEL: tbuffer_load_idx: 124; PREGFX10: ; %bb.0: ; %main_body 125; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] idxen 126; PREGFX10-NEXT: s_waitcnt vmcnt(0) 127; PREGFX10-NEXT: ; return to shader part epilog 128; 129; GFX10-LABEL: tbuffer_load_idx: 130; GFX10: ; %bb.0: ; %main_body 131; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:78 idxen 132; GFX10-NEXT: s_waitcnt vmcnt(0) 133; GFX10-NEXT: ; return to shader part epilog 134; 135; GFX11-LABEL: tbuffer_load_idx: 136; GFX11: ; %bb.0: ; %main_body 137; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:78 idxen 138; GFX11-NEXT: s_waitcnt vmcnt(0) 139; GFX11-NEXT: ; return to shader part epilog 140main_body: 141 %vdata = call <4 x i32> @llvm.amdgcn.struct.ptr.tbuffer.load.v4i32(ptr addrspace(8) %0, i32 %vindex, i32 0, i32 0, i32 78, i32 0) 142 %vdata.f = bitcast <4 x i32> %vdata to <4 x float> 143 ret <4 x float> %vdata.f 144} 145 146define amdgpu_vs <4 x float> @tbuffer_load_ofs(ptr addrspace(8) inreg, i32 %voffs) { 147; PREGFX10-LABEL: tbuffer_load_ofs: 148; PREGFX10: ; %bb.0: ; %main_body 149; PREGFX10-NEXT: s_mov_b32 s4, 0 150; PREGFX10-NEXT: v_mov_b32_e32 v1, v0 151; PREGFX10-NEXT: v_mov_b32_e32 v0, s4 152; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] idxen offen 153; PREGFX10-NEXT: s_waitcnt vmcnt(0) 154; PREGFX10-NEXT: ; return to shader part epilog 155; 156; GFX10-LABEL: tbuffer_load_ofs: 157; GFX10: ; %bb.0: ; %main_body 158; GFX10-NEXT: s_mov_b32 s4, 0 159; GFX10-NEXT: v_mov_b32_e32 v1, v0 160; GFX10-NEXT: v_mov_b32_e32 v0, s4 161; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:78 idxen offen 162; GFX10-NEXT: s_waitcnt vmcnt(0) 163; GFX10-NEXT: ; return to shader part epilog 164; 165; GFX11-LABEL: tbuffer_load_ofs: 166; GFX11: ; %bb.0: ; %main_body 167; GFX11-NEXT: s_mov_b32 s4, 0 168; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 169; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4 170; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:78 idxen offen 171; GFX11-NEXT: s_waitcnt vmcnt(0) 172; GFX11-NEXT: ; return to shader part epilog 173main_body: 174 %vdata = call <4 x i32> @llvm.amdgcn.struct.ptr.tbuffer.load.v4i32(ptr addrspace(8) %0, i32 0, i32 %voffs, i32 0, i32 78, i32 0) 175 %vdata.f = bitcast <4 x i32> %vdata to <4 x float> 176 ret <4 x float> %vdata.f 177} 178 179define amdgpu_vs <4 x float> @tbuffer_load_ofs_imm(ptr addrspace(8) inreg, i32 %voffs) { 180; PREGFX10-LABEL: tbuffer_load_ofs_imm: 181; PREGFX10: ; %bb.0: ; %main_body 182; PREGFX10-NEXT: s_mov_b32 s4, 0 183; PREGFX10-NEXT: v_mov_b32_e32 v1, v0 184; PREGFX10-NEXT: v_mov_b32_e32 v0, s4 185; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] idxen offen offset:52 186; PREGFX10-NEXT: s_waitcnt vmcnt(0) 187; PREGFX10-NEXT: ; return to shader part epilog 188; 189; GFX10-LABEL: tbuffer_load_ofs_imm: 190; GFX10: ; %bb.0: ; %main_body 191; GFX10-NEXT: s_mov_b32 s4, 0 192; GFX10-NEXT: v_mov_b32_e32 v1, v0 193; GFX10-NEXT: v_mov_b32_e32 v0, s4 194; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:78 idxen offen offset:52 195; GFX10-NEXT: s_waitcnt vmcnt(0) 196; GFX10-NEXT: ; return to shader part epilog 197; 198; GFX11-LABEL: tbuffer_load_ofs_imm: 199; GFX11: ; %bb.0: ; %main_body 200; GFX11-NEXT: s_mov_b32 s4, 0 201; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 202; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4 203; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:78 idxen offen offset:52 204; GFX11-NEXT: s_waitcnt vmcnt(0) 205; GFX11-NEXT: ; return to shader part epilog 206main_body: 207 %ofs = add i32 %voffs, 52 208 %vdata = call <4 x i32> @llvm.amdgcn.struct.ptr.tbuffer.load.v4i32(ptr addrspace(8) %0, i32 0, i32 %ofs, i32 0, i32 78, i32 0) 209 %vdata.f = bitcast <4 x i32> %vdata to <4 x float> 210 ret <4 x float> %vdata.f 211} 212 213define amdgpu_vs <4 x float> @tbuffer_load_both(ptr addrspace(8) inreg, i32 %vindex, i32 %voffs) { 214; PREGFX10-LABEL: tbuffer_load_both: 215; PREGFX10: ; %bb.0: ; %main_body 216; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] idxen offen 217; PREGFX10-NEXT: s_waitcnt vmcnt(0) 218; PREGFX10-NEXT: ; return to shader part epilog 219; 220; GFX10-LABEL: tbuffer_load_both: 221; GFX10: ; %bb.0: ; %main_body 222; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:78 idxen offen 223; GFX10-NEXT: s_waitcnt vmcnt(0) 224; GFX10-NEXT: ; return to shader part epilog 225; 226; GFX11-LABEL: tbuffer_load_both: 227; GFX11: ; %bb.0: ; %main_body 228; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:78 idxen offen 229; GFX11-NEXT: s_waitcnt vmcnt(0) 230; GFX11-NEXT: ; return to shader part epilog 231main_body: 232 %vdata = call <4 x i32> @llvm.amdgcn.struct.ptr.tbuffer.load.v4i32(ptr addrspace(8) %0, i32 %vindex, i32 %voffs, i32 0, i32 78, i32 0) 233 %vdata.f = bitcast <4 x i32> %vdata to <4 x float> 234 ret <4 x float> %vdata.f 235} 236 237define amdgpu_vs <2 x float> @buffer_load_xy(ptr addrspace(8) inreg %rsrc) { 238; PREGFX10-LABEL: buffer_load_xy: 239; PREGFX10: ; %bb.0: 240; PREGFX10-NEXT: v_mov_b32_e32 v0, 0 241; PREGFX10-NEXT: tbuffer_load_format_xy v[0:1], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen 242; PREGFX10-NEXT: s_waitcnt vmcnt(0) 243; PREGFX10-NEXT: ; return to shader part epilog 244; 245; GFX10-LABEL: buffer_load_xy: 246; GFX10: ; %bb.0: 247; GFX10-NEXT: v_mov_b32_e32 v0, 0 248; GFX10-NEXT: tbuffer_load_format_xy v[0:1], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen 249; GFX10-NEXT: s_waitcnt vmcnt(0) 250; GFX10-NEXT: ; return to shader part epilog 251; 252; GFX11-LABEL: buffer_load_xy: 253; GFX11: ; %bb.0: 254; GFX11-NEXT: v_mov_b32_e32 v0, 0 255; GFX11-NEXT: tbuffer_load_format_xy v[0:1], v0, s[0:3], 0 format:77 idxen 256; GFX11-NEXT: s_waitcnt vmcnt(0) 257; GFX11-NEXT: ; return to shader part epilog 258 %vdata = call <2 x i32> @llvm.amdgcn.struct.ptr.tbuffer.load.v2i32(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 77, i32 0) 259 %vdata.f = bitcast <2 x i32> %vdata to <2 x float> 260 ret <2 x float> %vdata.f 261} 262 263define amdgpu_vs float @buffer_load_x(ptr addrspace(8) inreg %rsrc) { 264; PREGFX10-LABEL: buffer_load_x: 265; PREGFX10: ; %bb.0: 266; PREGFX10-NEXT: v_mov_b32_e32 v0, 0 267; PREGFX10-NEXT: tbuffer_load_format_x v0, v0, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen 268; PREGFX10-NEXT: s_waitcnt vmcnt(0) 269; PREGFX10-NEXT: ; return to shader part epilog 270; 271; GFX10-LABEL: buffer_load_x: 272; GFX10: ; %bb.0: 273; GFX10-NEXT: v_mov_b32_e32 v0, 0 274; GFX10-NEXT: tbuffer_load_format_x v0, v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen 275; GFX10-NEXT: s_waitcnt vmcnt(0) 276; GFX10-NEXT: ; return to shader part epilog 277; 278; GFX11-LABEL: buffer_load_x: 279; GFX11: ; %bb.0: 280; GFX11-NEXT: v_mov_b32_e32 v0, 0 281; GFX11-NEXT: tbuffer_load_format_x v0, v0, s[0:3], 0 format:77 idxen 282; GFX11-NEXT: s_waitcnt vmcnt(0) 283; GFX11-NEXT: ; return to shader part epilog 284 %vdata = call i32 @llvm.amdgcn.struct.ptr.tbuffer.load.i32(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 77, i32 0) 285 %vdata.f = bitcast i32 %vdata to float 286 ret float %vdata.f 287} 288 289define amdgpu_ps <4 x float> @buffer_load_voffset_large_12bit(ptr addrspace(8) inreg) { 290; PREGFX10-LABEL: buffer_load_voffset_large_12bit: 291; PREGFX10: ; %bb.0: ; %main_body 292; PREGFX10-NEXT: v_mov_b32_e32 v0, 0 293; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offset:4092 294; PREGFX10-NEXT: s_waitcnt vmcnt(0) 295; PREGFX10-NEXT: ; return to shader part epilog 296; 297; GFX10-LABEL: buffer_load_voffset_large_12bit: 298; GFX10: ; %bb.0: ; %main_body 299; GFX10-NEXT: v_mov_b32_e32 v0, 0 300; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offset:4092 301; GFX10-NEXT: s_waitcnt vmcnt(0) 302; GFX10-NEXT: ; return to shader part epilog 303; 304; GFX11-LABEL: buffer_load_voffset_large_12bit: 305; GFX11: ; %bb.0: ; %main_body 306; GFX11-NEXT: v_mov_b32_e32 v0, 0 307; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offset:4092 308; GFX11-NEXT: s_waitcnt vmcnt(0) 309; GFX11-NEXT: ; return to shader part epilog 310main_body: 311 %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 4092, i32 0, i32 63, i32 0) 312 ret <4 x float> %data 313} 314 315define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_13bit(ptr addrspace(8) inreg) { 316; PREGFX10-LABEL: tbuffer_load_voffset_large_13bit: 317; PREGFX10: ; %bb.0: ; %main_body 318; PREGFX10-NEXT: s_mov_b32 s4, 0 319; PREGFX10-NEXT: v_mov_b32_e32 v1, 0x1000 320; PREGFX10-NEXT: v_mov_b32_e32 v0, s4 321; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092 322; PREGFX10-NEXT: s_waitcnt vmcnt(0) 323; PREGFX10-NEXT: ; return to shader part epilog 324; 325; GFX10-LABEL: tbuffer_load_voffset_large_13bit: 326; GFX10: ; %bb.0: ; %main_body 327; GFX10-NEXT: s_mov_b32 s4, 0 328; GFX10-NEXT: v_mov_b32_e32 v1, 0x1000 329; GFX10-NEXT: v_mov_b32_e32 v0, s4 330; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offen offset:4092 331; GFX10-NEXT: s_waitcnt vmcnt(0) 332; GFX10-NEXT: ; return to shader part epilog 333; 334; GFX11-LABEL: tbuffer_load_voffset_large_13bit: 335; GFX11: ; %bb.0: ; %main_body 336; GFX11-NEXT: s_mov_b32 s4, 0 337; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 338; GFX11-NEXT: v_dual_mov_b32 v1, 0x1000 :: v_dual_mov_b32 v0, s4 339; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092 340; GFX11-NEXT: s_waitcnt vmcnt(0) 341; GFX11-NEXT: ; return to shader part epilog 342main_body: 343 %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 8188, i32 0, i32 63, i32 0) 344 ret <4 x float> %data 345} 346 347define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_16bit(ptr addrspace(8) inreg) { 348; PREGFX10-LABEL: tbuffer_load_voffset_large_16bit: 349; PREGFX10: ; %bb.0: ; %main_body 350; PREGFX10-NEXT: s_mov_b32 s4, 0 351; PREGFX10-NEXT: v_mov_b32_e32 v1, 0xf000 352; PREGFX10-NEXT: v_mov_b32_e32 v0, s4 353; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092 354; PREGFX10-NEXT: s_waitcnt vmcnt(0) 355; PREGFX10-NEXT: ; return to shader part epilog 356; 357; GFX10-LABEL: tbuffer_load_voffset_large_16bit: 358; GFX10: ; %bb.0: ; %main_body 359; GFX10-NEXT: s_mov_b32 s4, 0 360; GFX10-NEXT: v_mov_b32_e32 v1, 0xf000 361; GFX10-NEXT: v_mov_b32_e32 v0, s4 362; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offen offset:4092 363; GFX10-NEXT: s_waitcnt vmcnt(0) 364; GFX10-NEXT: ; return to shader part epilog 365; 366; GFX11-LABEL: tbuffer_load_voffset_large_16bit: 367; GFX11: ; %bb.0: ; %main_body 368; GFX11-NEXT: s_mov_b32 s4, 0 369; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 370; GFX11-NEXT: v_dual_mov_b32 v1, 0xf000 :: v_dual_mov_b32 v0, s4 371; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092 372; GFX11-NEXT: s_waitcnt vmcnt(0) 373; GFX11-NEXT: ; return to shader part epilog 374main_body: 375 %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 65532, i32 0, i32 63, i32 0) 376 ret <4 x float> %data 377} 378 379define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_23bit(ptr addrspace(8) inreg) { 380; PREGFX10-LABEL: tbuffer_load_voffset_large_23bit: 381; PREGFX10: ; %bb.0: ; %main_body 382; PREGFX10-NEXT: s_mov_b32 s4, 0 383; PREGFX10-NEXT: v_mov_b32_e32 v1, 0x7ff000 384; PREGFX10-NEXT: v_mov_b32_e32 v0, s4 385; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092 386; PREGFX10-NEXT: s_waitcnt vmcnt(0) 387; PREGFX10-NEXT: ; return to shader part epilog 388; 389; GFX10-LABEL: tbuffer_load_voffset_large_23bit: 390; GFX10: ; %bb.0: ; %main_body 391; GFX10-NEXT: s_mov_b32 s4, 0 392; GFX10-NEXT: v_mov_b32_e32 v1, 0x7ff000 393; GFX10-NEXT: v_mov_b32_e32 v0, s4 394; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offen offset:4092 395; GFX10-NEXT: s_waitcnt vmcnt(0) 396; GFX10-NEXT: ; return to shader part epilog 397; 398; GFX11-LABEL: tbuffer_load_voffset_large_23bit: 399; GFX11: ; %bb.0: ; %main_body 400; GFX11-NEXT: s_mov_b32 s4, 0 401; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 402; GFX11-NEXT: v_dual_mov_b32 v1, 0x7ff000 :: v_dual_mov_b32 v0, s4 403; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092 404; GFX11-NEXT: s_waitcnt vmcnt(0) 405; GFX11-NEXT: ; return to shader part epilog 406main_body: 407 %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 8388604, i32 0, i32 63, i32 0) 408 ret <4 x float> %data 409} 410 411define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_24bit(ptr addrspace(8) inreg) { 412; PREGFX10-LABEL: tbuffer_load_voffset_large_24bit: 413; PREGFX10: ; %bb.0: ; %main_body 414; PREGFX10-NEXT: s_mov_b32 s4, 0 415; PREGFX10-NEXT: v_mov_b32_e32 v1, 0xfff000 416; PREGFX10-NEXT: v_mov_b32_e32 v0, s4 417; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092 418; PREGFX10-NEXT: s_waitcnt vmcnt(0) 419; PREGFX10-NEXT: ; return to shader part epilog 420; 421; GFX10-LABEL: tbuffer_load_voffset_large_24bit: 422; GFX10: ; %bb.0: ; %main_body 423; GFX10-NEXT: s_mov_b32 s4, 0 424; GFX10-NEXT: v_mov_b32_e32 v1, 0xfff000 425; GFX10-NEXT: v_mov_b32_e32 v0, s4 426; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offen offset:4092 427; GFX10-NEXT: s_waitcnt vmcnt(0) 428; GFX10-NEXT: ; return to shader part epilog 429; 430; GFX11-LABEL: tbuffer_load_voffset_large_24bit: 431; GFX11: ; %bb.0: ; %main_body 432; GFX11-NEXT: s_mov_b32 s4, 0 433; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 434; GFX11-NEXT: v_dual_mov_b32 v1, 0xfff000 :: v_dual_mov_b32 v0, s4 435; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092 436; GFX11-NEXT: s_waitcnt vmcnt(0) 437; GFX11-NEXT: ; return to shader part epilog 438main_body: 439 %data = call <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 16777212, i32 0, i32 63, i32 0) 440 ret <4 x float> %data 441} 442 443declare i32 @llvm.amdgcn.struct.ptr.tbuffer.load.i32(ptr addrspace(8), i32, i32, i32, i32, i32) 444declare <2 x i32> @llvm.amdgcn.struct.ptr.tbuffer.load.v2i32(ptr addrspace(8), i32, i32, i32, i32, i32) 445declare <4 x i32> @llvm.amdgcn.struct.ptr.tbuffer.load.v4i32(ptr addrspace(8), i32, i32, i32, i32, i32) 446declare <4 x float> @llvm.amdgcn.struct.ptr.tbuffer.load.v4f32(ptr addrspace(8), i32, i32, i32, i32, i32) 447