1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2;RUN: llc < %s -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefixes=PREGFX10 %s 3;RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=PREGFX10 %s 4;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GFX10 %s 5;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck -check-prefixes=GFX11 %s 6;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s 7;RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s 8 9define amdgpu_vs {<4 x float>, <4 x float>, <4 x float>, <4 x float>} @tbuffer_load(<4 x i32> inreg) { 10; PREGFX10-LABEL: tbuffer_load: 11; PREGFX10: ; %bb.0: ; %main_body 12; PREGFX10-NEXT: v_mov_b32_e32 v12, 0 13; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v12, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] idxen 14; PREGFX10-NEXT: tbuffer_load_format_xyzw v[4:7], v12, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen glc 15; PREGFX10-NEXT: tbuffer_load_format_xyzw v[8:11], v12, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen slc 16; PREGFX10-NEXT: tbuffer_load_format_xyzw v[12:15], v12, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] idxen glc 17; PREGFX10-NEXT: s_waitcnt vmcnt(0) 18; PREGFX10-NEXT: ; return to shader part epilog 19; 20; GFX10-LABEL: tbuffer_load: 21; GFX10: ; %bb.0: ; %main_body 22; GFX10-NEXT: v_mov_b32_e32 v16, 0 23; GFX10-NEXT: s_clause 0x3 24; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v16, s[0:3], 0 format:78 idxen 25; GFX10-NEXT: tbuffer_load_format_xyzw v[4:7], v16, s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen glc 26; GFX10-NEXT: tbuffer_load_format_xyzw v[8:11], v16, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen slc 27; GFX10-NEXT: tbuffer_load_format_xyzw v[12:15], v16, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen glc dlc 28; GFX10-NEXT: s_waitcnt vmcnt(0) 29; GFX10-NEXT: ; return to shader part epilog 30; 31; GFX11-LABEL: tbuffer_load: 32; GFX11: ; %bb.0: ; %main_body 33; GFX11-NEXT: v_mov_b32_e32 v12, 0 34; GFX11-NEXT: s_clause 0x3 35; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v12, s[0:3], 0 format:78 idxen 36; GFX11-NEXT: tbuffer_load_format_xyzw v[4:7], v12, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen glc 37; GFX11-NEXT: tbuffer_load_format_xyzw v[8:11], v12, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen slc 38; GFX11-NEXT: tbuffer_load_format_xyzw v[12:15], v12, s[0:3], 0 format:[BUF_FMT_32_FLOAT] idxen glc dlc 39; GFX11-NEXT: s_waitcnt vmcnt(0) 40; GFX11-NEXT: ; return to shader part epilog 41; 42; GFX12-LABEL: tbuffer_load: 43; GFX12: ; %bb.0: ; %main_body 44; GFX12-NEXT: v_mov_b32_e32 v12, 0 45; GFX12-NEXT: s_clause 0x3 46; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], v12, s[0:3], null format:78 idxen 47; GFX12-NEXT: tbuffer_load_format_xyzw v[4:7], v12, s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] idxen th:TH_LOAD_NT 48; GFX12-NEXT: tbuffer_load_format_xyzw v[8:11], v12, s[0:3], null format:[BUF_FMT_32_FLOAT] idxen th:TH_LOAD_HT 49; GFX12-NEXT: tbuffer_load_format_xyzw v[12:15], v12, s[0:3], null format:[BUF_FMT_32_FLOAT] idxen th:TH_LOAD_RT_NT 50; GFX12-NEXT: s_wait_loadcnt 0x0 51; GFX12-NEXT: ; return to shader part epilog 52main_body: 53 %vdata = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 78, i32 0) 54 %vdata_glc = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 63, i32 1) 55 %vdata_slc = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 22, i32 2) 56 %vdata_f32 = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 22, i32 5) 57 %vdata.f = bitcast <4 x i32> %vdata to <4 x float> 58 %vdata_glc.f = bitcast <4 x i32> %vdata_glc to <4 x float> 59 %vdata_slc.f = bitcast <4 x i32> %vdata_slc to <4 x float> 60 %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %vdata.f, 0 61 %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %vdata_glc.f, 1 62 %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %vdata_slc.f, 2 63 %r3 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r2, <4 x float> %vdata_f32, 3 64 ret {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r3 65} 66 67define amdgpu_vs <4 x float> @tbuffer_load_immoffs(<4 x i32> inreg) { 68; PREGFX10-LABEL: tbuffer_load_immoffs: 69; PREGFX10: ; %bb.0: ; %main_body 70; PREGFX10-NEXT: v_mov_b32_e32 v0, 0 71; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:42 72; PREGFX10-NEXT: s_waitcnt vmcnt(0) 73; PREGFX10-NEXT: ; return to shader part epilog 74; 75; GFX10-LABEL: tbuffer_load_immoffs: 76; GFX10: ; %bb.0: ; %main_body 77; GFX10-NEXT: v_mov_b32_e32 v0, 0 78; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:78 idxen offset:42 79; GFX10-NEXT: s_waitcnt vmcnt(0) 80; GFX10-NEXT: ; return to shader part epilog 81; 82; GFX11-LABEL: tbuffer_load_immoffs: 83; GFX11: ; %bb.0: ; %main_body 84; GFX11-NEXT: v_mov_b32_e32 v0, 0 85; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:78 idxen offset:42 86; GFX11-NEXT: s_waitcnt vmcnt(0) 87; GFX11-NEXT: ; return to shader part epilog 88; 89; GFX12-LABEL: tbuffer_load_immoffs: 90; GFX12: ; %bb.0: ; %main_body 91; GFX12-NEXT: v_mov_b32_e32 v0, 0 92; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], null format:78 idxen offset:42 93; GFX12-NEXT: s_wait_loadcnt 0x0 94; GFX12-NEXT: ; return to shader part epilog 95main_body: 96 %vdata = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 42, i32 0, i32 78, i32 0) 97 %vdata.f = bitcast <4 x i32> %vdata to <4 x float> 98 ret <4 x float> %vdata.f 99} 100 101define amdgpu_vs {<4 x float>, <4 x float>, <4 x float>} @tbuffer_load_immoffs_large(<4 x i32> inreg, i32 inreg %soffs) { 102; PREGFX10-LABEL: tbuffer_load_immoffs_large: 103; PREGFX10: ; %bb.0: 104; PREGFX10-NEXT: v_mov_b32_e32 v8, 0 105; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v8, s[0:3], 61 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] idxen offset:4095 106; PREGFX10-NEXT: tbuffer_load_format_xyzw v[4:7], v8, s[0:3], s4 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_SSCALED] idxen offset:73 107; PREGFX10-NEXT: tbuffer_load_format_xyzw v[8:11], v8, s[0:3], s4 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:1 108; PREGFX10-NEXT: s_waitcnt vmcnt(0) 109; PREGFX10-NEXT: ; return to shader part epilog 110; 111; GFX10-LABEL: tbuffer_load_immoffs_large: 112; GFX10: ; %bb.0: 113; GFX10-NEXT: v_mov_b32_e32 v12, 0 114; GFX10-NEXT: s_clause 0x2 115; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v12, s[0:3], 61 format:[BUF_FMT_10_10_10_2_SSCALED] idxen offset:4095 116; GFX10-NEXT: tbuffer_load_format_xyzw v[4:7], v12, s[0:3], s4 format:[BUF_FMT_32_32_UINT] idxen offset:73 117; GFX10-NEXT: tbuffer_load_format_xyzw v[8:11], v12, s[0:3], s4 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offset:1 118; GFX10-NEXT: s_waitcnt vmcnt(0) 119; GFX10-NEXT: ; return to shader part epilog 120; 121; GFX11-LABEL: tbuffer_load_immoffs_large: 122; GFX11: ; %bb.0: 123; GFX11-NEXT: v_mov_b32_e32 v8, 0 124; GFX11-NEXT: s_clause 0x2 125; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v8, s[0:3], 61 format:[BUF_FMT_8_8_8_8_SINT] idxen offset:4095 126; GFX11-NEXT: tbuffer_load_format_xyzw v[4:7], v8, s[0:3], s4 format:[BUF_FMT_32_32_32_32_SINT] idxen offset:73 127; GFX11-NEXT: tbuffer_load_format_xyzw v[8:11], v8, s[0:3], s4 format:77 idxen offset:1 128; GFX11-NEXT: s_waitcnt vmcnt(0) 129; GFX11-NEXT: ; return to shader part epilog 130; 131; GFX12-LABEL: tbuffer_load_immoffs_large: 132; GFX12: ; %bb.0: 133; GFX12-NEXT: v_mov_b32_e32 v8, 0 134; GFX12-NEXT: s_mov_b32 s5, 61 135; GFX12-NEXT: s_clause 0x2 136; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], v8, s[0:3], s5 format:[BUF_FMT_8_8_8_8_SINT] idxen offset:4095 137; GFX12-NEXT: tbuffer_load_format_xyzw v[4:7], v8, s[0:3], s4 format:[BUF_FMT_32_32_32_32_SINT] idxen offset:73 138; GFX12-NEXT: tbuffer_load_format_xyzw v[8:11], v8, s[0:3], s4 format:77 idxen offset:1 139; GFX12-NEXT: s_wait_loadcnt 0x0 140; GFX12-NEXT: ; return to shader part epilog 141 %vdata = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 4095, i32 61, i32 47, i32 0) 142 %vdata_glc = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 73, i32 %soffs, i32 62, i32 0) 143 %vdata_slc = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 1, i32 %soffs, i32 77, i32 0) 144 %vdata.f = bitcast <4 x i32> %vdata to <4 x float> 145 %vdata_glc.f = bitcast <4 x i32> %vdata_glc to <4 x float> 146 %vdata_slc.f = bitcast <4 x i32> %vdata_slc to <4 x float> 147 %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %vdata.f, 0 148 %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %vdata_glc.f, 1 149 %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %vdata_slc.f, 2 150 ret {<4 x float>, <4 x float>, <4 x float>} %r2 151} 152 153define amdgpu_vs <4 x float> @tbuffer_load_idx(<4 x i32> inreg, i32 %vindex) { 154; PREGFX10-LABEL: tbuffer_load_idx: 155; PREGFX10: ; %bb.0: ; %main_body 156; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] idxen 157; PREGFX10-NEXT: s_waitcnt vmcnt(0) 158; PREGFX10-NEXT: ; return to shader part epilog 159; 160; GFX10-LABEL: tbuffer_load_idx: 161; GFX10: ; %bb.0: ; %main_body 162; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:78 idxen 163; GFX10-NEXT: s_waitcnt vmcnt(0) 164; GFX10-NEXT: ; return to shader part epilog 165; 166; GFX11-LABEL: tbuffer_load_idx: 167; GFX11: ; %bb.0: ; %main_body 168; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:78 idxen 169; GFX11-NEXT: s_waitcnt vmcnt(0) 170; GFX11-NEXT: ; return to shader part epilog 171; 172; GFX12-LABEL: tbuffer_load_idx: 173; GFX12: ; %bb.0: ; %main_body 174; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], null format:78 idxen 175; GFX12-NEXT: s_wait_loadcnt 0x0 176; GFX12-NEXT: ; return to shader part epilog 177main_body: 178 %vdata = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 %vindex, i32 0, i32 0, i32 78, i32 0) 179 %vdata.f = bitcast <4 x i32> %vdata to <4 x float> 180 ret <4 x float> %vdata.f 181} 182 183define amdgpu_vs <4 x float> @tbuffer_load_ofs(<4 x i32> inreg, i32 %voffs) { 184; PREGFX10-LABEL: tbuffer_load_ofs: 185; PREGFX10: ; %bb.0: ; %main_body 186; PREGFX10-NEXT: s_mov_b32 s4, 0 187; PREGFX10-NEXT: v_mov_b32_e32 v1, v0 188; PREGFX10-NEXT: v_mov_b32_e32 v0, s4 189; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] idxen offen 190; PREGFX10-NEXT: s_waitcnt vmcnt(0) 191; PREGFX10-NEXT: ; return to shader part epilog 192; 193; GFX10-LABEL: tbuffer_load_ofs: 194; GFX10: ; %bb.0: ; %main_body 195; GFX10-NEXT: s_mov_b32 s4, 0 196; GFX10-NEXT: v_mov_b32_e32 v1, v0 197; GFX10-NEXT: v_mov_b32_e32 v0, s4 198; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:78 idxen offen 199; GFX10-NEXT: s_waitcnt vmcnt(0) 200; GFX10-NEXT: ; return to shader part epilog 201; 202; GFX11-LABEL: tbuffer_load_ofs: 203; GFX11: ; %bb.0: ; %main_body 204; GFX11-NEXT: s_mov_b32 s4, 0 205; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 206; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4 207; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:78 idxen offen 208; GFX11-NEXT: s_waitcnt vmcnt(0) 209; GFX11-NEXT: ; return to shader part epilog 210; 211; GFX12-LABEL: tbuffer_load_ofs: 212; GFX12: ; %bb.0: ; %main_body 213; GFX12-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, 0 214; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], null format:78 idxen offen 215; GFX12-NEXT: s_wait_loadcnt 0x0 216; GFX12-NEXT: ; return to shader part epilog 217main_body: 218 %vdata = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 %voffs, i32 0, i32 78, i32 0) 219 %vdata.f = bitcast <4 x i32> %vdata to <4 x float> 220 ret <4 x float> %vdata.f 221} 222 223define amdgpu_vs <4 x float> @tbuffer_load_ofs_imm(<4 x i32> inreg, i32 %voffs) { 224; PREGFX10-LABEL: tbuffer_load_ofs_imm: 225; PREGFX10: ; %bb.0: ; %main_body 226; PREGFX10-NEXT: s_mov_b32 s4, 0 227; PREGFX10-NEXT: v_mov_b32_e32 v1, v0 228; PREGFX10-NEXT: v_mov_b32_e32 v0, s4 229; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] idxen offen offset:52 230; PREGFX10-NEXT: s_waitcnt vmcnt(0) 231; PREGFX10-NEXT: ; return to shader part epilog 232; 233; GFX10-LABEL: tbuffer_load_ofs_imm: 234; GFX10: ; %bb.0: ; %main_body 235; GFX10-NEXT: s_mov_b32 s4, 0 236; GFX10-NEXT: v_mov_b32_e32 v1, v0 237; GFX10-NEXT: v_mov_b32_e32 v0, s4 238; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:78 idxen offen offset:52 239; GFX10-NEXT: s_waitcnt vmcnt(0) 240; GFX10-NEXT: ; return to shader part epilog 241; 242; GFX11-LABEL: tbuffer_load_ofs_imm: 243; GFX11: ; %bb.0: ; %main_body 244; GFX11-NEXT: s_mov_b32 s4, 0 245; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 246; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4 247; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:78 idxen offen offset:52 248; GFX11-NEXT: s_waitcnt vmcnt(0) 249; GFX11-NEXT: ; return to shader part epilog 250; 251; GFX12-LABEL: tbuffer_load_ofs_imm: 252; GFX12: ; %bb.0: ; %main_body 253; GFX12-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, 0 254; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], null format:78 idxen offen offset:52 255; GFX12-NEXT: s_wait_loadcnt 0x0 256; GFX12-NEXT: ; return to shader part epilog 257main_body: 258 %ofs = add i32 %voffs, 52 259 %vdata = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 %ofs, i32 0, i32 78, i32 0) 260 %vdata.f = bitcast <4 x i32> %vdata to <4 x float> 261 ret <4 x float> %vdata.f 262} 263 264define amdgpu_vs <4 x float> @tbuffer_load_both(<4 x i32> inreg, i32 %vindex, i32 %voffs) { 265; PREGFX10-LABEL: tbuffer_load_both: 266; PREGFX10: ; %bb.0: ; %main_body 267; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] idxen offen 268; PREGFX10-NEXT: s_waitcnt vmcnt(0) 269; PREGFX10-NEXT: ; return to shader part epilog 270; 271; GFX10-LABEL: tbuffer_load_both: 272; GFX10: ; %bb.0: ; %main_body 273; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:78 idxen offen 274; GFX10-NEXT: s_waitcnt vmcnt(0) 275; GFX10-NEXT: ; return to shader part epilog 276; 277; GFX11-LABEL: tbuffer_load_both: 278; GFX11: ; %bb.0: ; %main_body 279; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:78 idxen offen 280; GFX11-NEXT: s_waitcnt vmcnt(0) 281; GFX11-NEXT: ; return to shader part epilog 282; 283; GFX12-LABEL: tbuffer_load_both: 284; GFX12: ; %bb.0: ; %main_body 285; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], null format:78 idxen offen 286; GFX12-NEXT: s_wait_loadcnt 0x0 287; GFX12-NEXT: ; return to shader part epilog 288main_body: 289 %vdata = call <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32> %0, i32 %vindex, i32 %voffs, i32 0, i32 78, i32 0) 290 %vdata.f = bitcast <4 x i32> %vdata to <4 x float> 291 ret <4 x float> %vdata.f 292} 293 294define amdgpu_vs <2 x float> @buffer_load_xy(<4 x i32> inreg %rsrc) { 295; PREGFX10-LABEL: buffer_load_xy: 296; PREGFX10: ; %bb.0: 297; PREGFX10-NEXT: v_mov_b32_e32 v0, 0 298; PREGFX10-NEXT: tbuffer_load_format_xy v[0:1], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen 299; PREGFX10-NEXT: s_waitcnt vmcnt(0) 300; PREGFX10-NEXT: ; return to shader part epilog 301; 302; GFX10-LABEL: buffer_load_xy: 303; GFX10: ; %bb.0: 304; GFX10-NEXT: v_mov_b32_e32 v0, 0 305; GFX10-NEXT: tbuffer_load_format_xy v[0:1], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen 306; GFX10-NEXT: s_waitcnt vmcnt(0) 307; GFX10-NEXT: ; return to shader part epilog 308; 309; GFX11-LABEL: buffer_load_xy: 310; GFX11: ; %bb.0: 311; GFX11-NEXT: v_mov_b32_e32 v0, 0 312; GFX11-NEXT: tbuffer_load_format_xy v[0:1], v0, s[0:3], 0 format:77 idxen 313; GFX11-NEXT: s_waitcnt vmcnt(0) 314; GFX11-NEXT: ; return to shader part epilog 315; 316; GFX12-LABEL: buffer_load_xy: 317; GFX12: ; %bb.0: 318; GFX12-NEXT: v_mov_b32_e32 v0, 0 319; GFX12-NEXT: tbuffer_load_format_xy v[0:1], v0, s[0:3], null format:77 idxen 320; GFX12-NEXT: s_wait_loadcnt 0x0 321; GFX12-NEXT: ; return to shader part epilog 322 %vdata = call <2 x i32> @llvm.amdgcn.struct.tbuffer.load.v2i32(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 77, i32 0) 323 %vdata.f = bitcast <2 x i32> %vdata to <2 x float> 324 ret <2 x float> %vdata.f 325} 326 327define amdgpu_vs float @buffer_load_x(<4 x i32> inreg %rsrc) { 328; PREGFX10-LABEL: buffer_load_x: 329; PREGFX10: ; %bb.0: 330; PREGFX10-NEXT: v_mov_b32_e32 v0, 0 331; PREGFX10-NEXT: tbuffer_load_format_x v0, v0, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen 332; PREGFX10-NEXT: s_waitcnt vmcnt(0) 333; PREGFX10-NEXT: ; return to shader part epilog 334; 335; GFX10-LABEL: buffer_load_x: 336; GFX10: ; %bb.0: 337; GFX10-NEXT: v_mov_b32_e32 v0, 0 338; GFX10-NEXT: tbuffer_load_format_x v0, v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen 339; GFX10-NEXT: s_waitcnt vmcnt(0) 340; GFX10-NEXT: ; return to shader part epilog 341; 342; GFX11-LABEL: buffer_load_x: 343; GFX11: ; %bb.0: 344; GFX11-NEXT: v_mov_b32_e32 v0, 0 345; GFX11-NEXT: tbuffer_load_format_x v0, v0, s[0:3], 0 format:77 idxen 346; GFX11-NEXT: s_waitcnt vmcnt(0) 347; GFX11-NEXT: ; return to shader part epilog 348; 349; GFX12-LABEL: buffer_load_x: 350; GFX12: ; %bb.0: 351; GFX12-NEXT: v_mov_b32_e32 v0, 0 352; GFX12-NEXT: tbuffer_load_format_x v0, v0, s[0:3], null format:77 idxen 353; GFX12-NEXT: s_wait_loadcnt 0x0 354; GFX12-NEXT: ; return to shader part epilog 355 %vdata = call i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 77, i32 0) 356 %vdata.f = bitcast i32 %vdata to float 357 ret float %vdata.f 358} 359 360define amdgpu_ps <4 x float> @buffer_load_voffset_large_12bit(<4 x i32> inreg) { 361; PREGFX10-LABEL: buffer_load_voffset_large_12bit: 362; PREGFX10: ; %bb.0: ; %main_body 363; PREGFX10-NEXT: v_mov_b32_e32 v0, 0 364; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offset:4092 365; PREGFX10-NEXT: s_waitcnt vmcnt(0) 366; PREGFX10-NEXT: ; return to shader part epilog 367; 368; GFX10-LABEL: buffer_load_voffset_large_12bit: 369; GFX10: ; %bb.0: ; %main_body 370; GFX10-NEXT: v_mov_b32_e32 v0, 0 371; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offset:4092 372; GFX10-NEXT: s_waitcnt vmcnt(0) 373; GFX10-NEXT: ; return to shader part epilog 374; 375; GFX11-LABEL: buffer_load_voffset_large_12bit: 376; GFX11: ; %bb.0: ; %main_body 377; GFX11-NEXT: v_mov_b32_e32 v0, 0 378; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offset:4092 379; GFX11-NEXT: s_waitcnt vmcnt(0) 380; GFX11-NEXT: ; return to shader part epilog 381; 382; GFX12-LABEL: buffer_load_voffset_large_12bit: 383; GFX12: ; %bb.0: ; %main_body 384; GFX12-NEXT: v_mov_b32_e32 v0, 0 385; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] idxen offset:4092 386; GFX12-NEXT: s_wait_loadcnt 0x0 387; GFX12-NEXT: ; return to shader part epilog 388main_body: 389 %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 4092, i32 0, i32 63, i32 0) 390 ret <4 x float> %data 391} 392 393define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_13bit(<4 x i32> inreg) { 394; PREGFX10-LABEL: tbuffer_load_voffset_large_13bit: 395; PREGFX10: ; %bb.0: ; %main_body 396; PREGFX10-NEXT: s_mov_b32 s4, 0 397; PREGFX10-NEXT: v_mov_b32_e32 v1, 0x1000 398; PREGFX10-NEXT: v_mov_b32_e32 v0, s4 399; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092 400; PREGFX10-NEXT: s_waitcnt vmcnt(0) 401; PREGFX10-NEXT: ; return to shader part epilog 402; 403; GFX10-LABEL: tbuffer_load_voffset_large_13bit: 404; GFX10: ; %bb.0: ; %main_body 405; GFX10-NEXT: s_mov_b32 s4, 0 406; GFX10-NEXT: v_mov_b32_e32 v1, 0x1000 407; GFX10-NEXT: v_mov_b32_e32 v0, s4 408; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offen offset:4092 409; GFX10-NEXT: s_waitcnt vmcnt(0) 410; GFX10-NEXT: ; return to shader part epilog 411; 412; GFX11-LABEL: tbuffer_load_voffset_large_13bit: 413; GFX11: ; %bb.0: ; %main_body 414; GFX11-NEXT: s_mov_b32 s4, 0 415; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 416; GFX11-NEXT: v_dual_mov_b32 v1, 0x1000 :: v_dual_mov_b32 v0, s4 417; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092 418; GFX11-NEXT: s_waitcnt vmcnt(0) 419; GFX11-NEXT: ; return to shader part epilog 420; 421; GFX12-LABEL: tbuffer_load_voffset_large_13bit: 422; GFX12: ; %bb.0: ; %main_body 423; GFX12-NEXT: v_mov_b32_e32 v0, 0 424; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] idxen offset:8188 425; GFX12-NEXT: s_wait_loadcnt 0x0 426; GFX12-NEXT: ; return to shader part epilog 427main_body: 428 %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 8188, i32 0, i32 63, i32 0) 429 ret <4 x float> %data 430} 431 432define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_16bit(<4 x i32> inreg) { 433; PREGFX10-LABEL: tbuffer_load_voffset_large_16bit: 434; PREGFX10: ; %bb.0: ; %main_body 435; PREGFX10-NEXT: s_mov_b32 s4, 0 436; PREGFX10-NEXT: v_mov_b32_e32 v1, 0xf000 437; PREGFX10-NEXT: v_mov_b32_e32 v0, s4 438; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092 439; PREGFX10-NEXT: s_waitcnt vmcnt(0) 440; PREGFX10-NEXT: ; return to shader part epilog 441; 442; GFX10-LABEL: tbuffer_load_voffset_large_16bit: 443; GFX10: ; %bb.0: ; %main_body 444; GFX10-NEXT: s_mov_b32 s4, 0 445; GFX10-NEXT: v_mov_b32_e32 v1, 0xf000 446; GFX10-NEXT: v_mov_b32_e32 v0, s4 447; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offen offset:4092 448; GFX10-NEXT: s_waitcnt vmcnt(0) 449; GFX10-NEXT: ; return to shader part epilog 450; 451; GFX11-LABEL: tbuffer_load_voffset_large_16bit: 452; GFX11: ; %bb.0: ; %main_body 453; GFX11-NEXT: s_mov_b32 s4, 0 454; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 455; GFX11-NEXT: v_dual_mov_b32 v1, 0xf000 :: v_dual_mov_b32 v0, s4 456; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092 457; GFX11-NEXT: s_waitcnt vmcnt(0) 458; GFX11-NEXT: ; return to shader part epilog 459; 460; GFX12-LABEL: tbuffer_load_voffset_large_16bit: 461; GFX12: ; %bb.0: ; %main_body 462; GFX12-NEXT: v_mov_b32_e32 v0, 0 463; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] idxen offset:65532 464; GFX12-NEXT: s_wait_loadcnt 0x0 465; GFX12-NEXT: ; return to shader part epilog 466main_body: 467 %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 65532, i32 0, i32 63, i32 0) 468 ret <4 x float> %data 469} 470 471define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_23bit(<4 x i32> inreg) { 472; PREGFX10-LABEL: tbuffer_load_voffset_large_23bit: 473; PREGFX10: ; %bb.0: ; %main_body 474; PREGFX10-NEXT: s_mov_b32 s4, 0 475; PREGFX10-NEXT: v_mov_b32_e32 v1, 0x7ff000 476; PREGFX10-NEXT: v_mov_b32_e32 v0, s4 477; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092 478; PREGFX10-NEXT: s_waitcnt vmcnt(0) 479; PREGFX10-NEXT: ; return to shader part epilog 480; 481; GFX10-LABEL: tbuffer_load_voffset_large_23bit: 482; GFX10: ; %bb.0: ; %main_body 483; GFX10-NEXT: s_mov_b32 s4, 0 484; GFX10-NEXT: v_mov_b32_e32 v1, 0x7ff000 485; GFX10-NEXT: v_mov_b32_e32 v0, s4 486; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offen offset:4092 487; GFX10-NEXT: s_waitcnt vmcnt(0) 488; GFX10-NEXT: ; return to shader part epilog 489; 490; GFX11-LABEL: tbuffer_load_voffset_large_23bit: 491; GFX11: ; %bb.0: ; %main_body 492; GFX11-NEXT: s_mov_b32 s4, 0 493; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 494; GFX11-NEXT: v_dual_mov_b32 v1, 0x7ff000 :: v_dual_mov_b32 v0, s4 495; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092 496; GFX11-NEXT: s_waitcnt vmcnt(0) 497; GFX11-NEXT: ; return to shader part epilog 498; 499; GFX12-LABEL: tbuffer_load_voffset_large_23bit: 500; GFX12: ; %bb.0: ; %main_body 501; GFX12-NEXT: v_mov_b32_e32 v0, 0 502; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] idxen offset:8388604 503; GFX12-NEXT: s_wait_loadcnt 0x0 504; GFX12-NEXT: ; return to shader part epilog 505main_body: 506 %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 8388604, i32 0, i32 63, i32 0) 507 ret <4 x float> %data 508} 509 510define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_24bit(<4 x i32> inreg) { 511; PREGFX10-LABEL: tbuffer_load_voffset_large_24bit: 512; PREGFX10: ; %bb.0: ; %main_body 513; PREGFX10-NEXT: s_mov_b32 s4, 0 514; PREGFX10-NEXT: v_mov_b32_e32 v1, 0xfff000 515; PREGFX10-NEXT: v_mov_b32_e32 v0, s4 516; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] idxen offen offset:4092 517; PREGFX10-NEXT: s_waitcnt vmcnt(0) 518; PREGFX10-NEXT: ; return to shader part epilog 519; 520; GFX10-LABEL: tbuffer_load_voffset_large_24bit: 521; GFX10: ; %bb.0: ; %main_body 522; GFX10-NEXT: s_mov_b32 s4, 0 523; GFX10-NEXT: v_mov_b32_e32 v1, 0xfff000 524; GFX10-NEXT: v_mov_b32_e32 v0, s4 525; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_SINT] idxen offen offset:4092 526; GFX10-NEXT: s_waitcnt vmcnt(0) 527; GFX10-NEXT: ; return to shader part epilog 528; 529; GFX11-LABEL: tbuffer_load_voffset_large_24bit: 530; GFX11: ; %bb.0: ; %main_body 531; GFX11-NEXT: s_mov_b32 s4, 0 532; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 533; GFX11-NEXT: v_dual_mov_b32 v1, 0xfff000 :: v_dual_mov_b32 v0, s4 534; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:4092 535; GFX11-NEXT: s_waitcnt vmcnt(0) 536; GFX11-NEXT: ; return to shader part epilog 537; 538; GFX12-SDAG-LABEL: tbuffer_load_voffset_large_24bit: 539; GFX12-SDAG: ; %bb.0: ; %main_body 540; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, 0x800000 :: v_dual_mov_b32 v0, 0 541; GFX12-SDAG-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:8388604 542; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 543; GFX12-SDAG-NEXT: ; return to shader part epilog 544; 545; GFX12-GISEL-LABEL: tbuffer_load_voffset_large_24bit: 546; GFX12-GISEL: ; %bb.0: ; %main_body 547; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x800000 548; GFX12-GISEL-NEXT: tbuffer_load_format_xyzw v[0:3], v[0:1], s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] idxen offen offset:8388604 549; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 550; GFX12-GISEL-NEXT: ; return to shader part epilog 551main_body: 552 %data = call <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 16777212, i32 0, i32 63, i32 0) 553 ret <4 x float> %data 554} 555 556declare i32 @llvm.amdgcn.struct.tbuffer.load.i32(<4 x i32>, i32, i32, i32, i32, i32) 557declare <2 x i32> @llvm.amdgcn.struct.tbuffer.load.v2i32(<4 x i32>, i32, i32, i32, i32, i32) 558declare <4 x i32> @llvm.amdgcn.struct.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32, i32) 559declare <4 x float> @llvm.amdgcn.struct.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32, i32, i32) 560