1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2;RUN: llc < %s -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=PREGFX10 %s 3;RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefix=PREGFX10 %s 4;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefix=GFX10 %s 5;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck -check-prefix=GFX11 %s 6;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck -check-prefix=GFX12 %s 7;RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck -check-prefix=GFX12 %s 8 9define amdgpu_vs {<4 x float>, <4 x float>, <4 x float>, <4 x float>} @tbuffer_load(<4 x i32> inreg) { 10; PREGFX10-LABEL: tbuffer_load: 11; PREGFX10: ; %bb.0: ; %main_body 12; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] 13; PREGFX10-NEXT: tbuffer_load_format_xyzw v[4:7], off, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] glc 14; PREGFX10-NEXT: tbuffer_load_format_xyzw v[8:11], off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] slc 15; PREGFX10-NEXT: tbuffer_load_format_xyzw v[12:15], off, s[0:3], 0 format:[BUF_DATA_FORMAT_10_11_11,BUF_NUM_FORMAT_SNORM] glc 16; PREGFX10-NEXT: s_waitcnt vmcnt(0) 17; PREGFX10-NEXT: ; return to shader part epilog 18; 19; GFX10-LABEL: tbuffer_load: 20; GFX10: ; %bb.0: ; %main_body 21; GFX10-NEXT: s_clause 0x3 22; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:78 23; GFX10-NEXT: tbuffer_load_format_xyzw v[4:7], off, s[0:3], 0 format:[BUF_FMT_32_32_SINT] glc 24; GFX10-NEXT: tbuffer_load_format_xyzw v[8:11], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] slc 25; GFX10-NEXT: tbuffer_load_format_xyzw v[12:15], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] glc dlc 26; GFX10-NEXT: s_waitcnt vmcnt(0) 27; GFX10-NEXT: ; return to shader part epilog 28; 29; GFX11-LABEL: tbuffer_load: 30; GFX11: ; %bb.0: ; %main_body 31; GFX11-NEXT: s_clause 0x3 32; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:78 33; GFX11-NEXT: tbuffer_load_format_xyzw v[4:7], off, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] glc 34; GFX11-NEXT: tbuffer_load_format_xyzw v[8:11], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] slc 35; GFX11-NEXT: tbuffer_load_format_xyzw v[12:15], off, s[0:3], 0 format:[BUF_FMT_32_FLOAT] glc dlc 36; GFX11-NEXT: s_waitcnt vmcnt(0) 37; GFX11-NEXT: ; return to shader part epilog 38; 39; GFX12-LABEL: tbuffer_load: 40; GFX12: ; %bb.0: ; %main_body 41; GFX12-NEXT: s_clause 0x3 42; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], null format:78 43; GFX12-NEXT: tbuffer_load_format_xyzw v[4:7], off, s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] th:TH_LOAD_NT 44; GFX12-NEXT: tbuffer_load_format_xyzw v[8:11], off, s[0:3], null format:[BUF_FMT_32_FLOAT] th:TH_LOAD_HT 45; GFX12-NEXT: tbuffer_load_format_xyzw v[12:15], off, s[0:3], null format:[BUF_FMT_32_FLOAT] th:TH_LOAD_RT_NT 46; GFX12-NEXT: s_wait_loadcnt 0x0 47; GFX12-NEXT: ; return to shader part epilog 48main_body: 49 %vdata = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 78, i32 0) 50 %vdata_glc = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 63, i32 1) 51 %vdata_slc = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 0, i32 0, i32 22, i32 2) 52 %vdata_f32 = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 0, i32 0, i32 22, i32 5) 53 %vdata.f = bitcast <4 x i32> %vdata to <4 x float> 54 %vdata_glc.f = bitcast <4 x i32> %vdata_glc to <4 x float> 55 %vdata_slc.f = bitcast <4 x i32> %vdata_slc to <4 x float> 56 %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %vdata.f, 0 57 %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %vdata_glc.f, 1 58 %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %vdata_slc.f, 2 59 %r3 = insertvalue {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r2, <4 x float> %vdata_f32, 3 60 ret {<4 x float>, <4 x float>, <4 x float>, <4 x float>} %r3 61} 62 63define amdgpu_vs <4 x float> @tbuffer_load_immoffs(<4 x i32> inreg) { 64; PREGFX10-LABEL: tbuffer_load_immoffs: 65; PREGFX10: ; %bb.0: ; %main_body 66; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] offset:42 67; PREGFX10-NEXT: s_waitcnt vmcnt(0) 68; PREGFX10-NEXT: ; return to shader part epilog 69; 70; GFX10-LABEL: tbuffer_load_immoffs: 71; GFX10: ; %bb.0: ; %main_body 72; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:78 offset:42 73; GFX10-NEXT: s_waitcnt vmcnt(0) 74; GFX10-NEXT: ; return to shader part epilog 75; 76; GFX11-LABEL: tbuffer_load_immoffs: 77; GFX11: ; %bb.0: ; %main_body 78; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:78 offset:42 79; GFX11-NEXT: s_waitcnt vmcnt(0) 80; GFX11-NEXT: ; return to shader part epilog 81; 82; GFX12-LABEL: tbuffer_load_immoffs: 83; GFX12: ; %bb.0: ; %main_body 84; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], null format:78 offset:42 85; GFX12-NEXT: s_wait_loadcnt 0x0 86; GFX12-NEXT: ; return to shader part epilog 87main_body: 88 %vdata = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 42, i32 0, i32 78, i32 0) 89 %vdata.f = bitcast <4 x i32> %vdata to <4 x float> 90 ret <4 x float> %vdata.f 91} 92 93define amdgpu_ps <4 x float> @buffer_load_voffset_large_12bit(<4 x i32> inreg) { 94; PREGFX10-LABEL: buffer_load_voffset_large_12bit: 95; PREGFX10: ; %bb.0: ; %main_body 96; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offset:4092 97; PREGFX10-NEXT: s_waitcnt vmcnt(0) 98; PREGFX10-NEXT: ; return to shader part epilog 99; 100; GFX10-LABEL: buffer_load_voffset_large_12bit: 101; GFX10: ; %bb.0: ; %main_body 102; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offset:4092 103; GFX10-NEXT: s_waitcnt vmcnt(0) 104; GFX10-NEXT: ; return to shader part epilog 105; 106; GFX11-LABEL: buffer_load_voffset_large_12bit: 107; GFX11: ; %bb.0: ; %main_body 108; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offset:4092 109; GFX11-NEXT: s_waitcnt vmcnt(0) 110; GFX11-NEXT: ; return to shader part epilog 111; 112; GFX12-LABEL: buffer_load_voffset_large_12bit: 113; GFX12: ; %bb.0: ; %main_body 114; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] offset:4092 115; GFX12-NEXT: s_wait_loadcnt 0x0 116; GFX12-NEXT: ; return to shader part epilog 117main_body: 118 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 4092, i32 0, i32 63, i32 0) 119 ret <4 x float> %data 120} 121 122define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_13bit(<4 x i32> inreg) { 123; PREGFX10-LABEL: tbuffer_load_voffset_large_13bit: 124; PREGFX10: ; %bb.0: ; %main_body 125; PREGFX10-NEXT: v_mov_b32_e32 v0, 0x1000 126; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offen offset:4092 127; PREGFX10-NEXT: s_waitcnt vmcnt(0) 128; PREGFX10-NEXT: ; return to shader part epilog 129; 130; GFX10-LABEL: tbuffer_load_voffset_large_13bit: 131; GFX10: ; %bb.0: ; %main_body 132; GFX10-NEXT: v_mov_b32_e32 v0, 0x1000 133; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092 134; GFX10-NEXT: s_waitcnt vmcnt(0) 135; GFX10-NEXT: ; return to shader part epilog 136; 137; GFX11-LABEL: tbuffer_load_voffset_large_13bit: 138; GFX11: ; %bb.0: ; %main_body 139; GFX11-NEXT: v_mov_b32_e32 v0, 0x1000 140; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092 141; GFX11-NEXT: s_waitcnt vmcnt(0) 142; GFX11-NEXT: ; return to shader part epilog 143; 144; GFX12-LABEL: tbuffer_load_voffset_large_13bit: 145; GFX12: ; %bb.0: ; %main_body 146; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] offset:8188 147; GFX12-NEXT: s_wait_loadcnt 0x0 148; GFX12-NEXT: ; return to shader part epilog 149main_body: 150 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 8188, i32 0, i32 63, i32 0) 151 ret <4 x float> %data 152} 153 154define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_16bit(<4 x i32> inreg) { 155; PREGFX10-LABEL: tbuffer_load_voffset_large_16bit: 156; PREGFX10: ; %bb.0: ; %main_body 157; PREGFX10-NEXT: v_mov_b32_e32 v0, 0xf000 158; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offen offset:4092 159; PREGFX10-NEXT: s_waitcnt vmcnt(0) 160; PREGFX10-NEXT: ; return to shader part epilog 161; 162; GFX10-LABEL: tbuffer_load_voffset_large_16bit: 163; GFX10: ; %bb.0: ; %main_body 164; GFX10-NEXT: v_mov_b32_e32 v0, 0xf000 165; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092 166; GFX10-NEXT: s_waitcnt vmcnt(0) 167; GFX10-NEXT: ; return to shader part epilog 168; 169; GFX11-LABEL: tbuffer_load_voffset_large_16bit: 170; GFX11: ; %bb.0: ; %main_body 171; GFX11-NEXT: v_mov_b32_e32 v0, 0xf000 172; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092 173; GFX11-NEXT: s_waitcnt vmcnt(0) 174; GFX11-NEXT: ; return to shader part epilog 175; 176; GFX12-LABEL: tbuffer_load_voffset_large_16bit: 177; GFX12: ; %bb.0: ; %main_body 178; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] offset:65532 179; GFX12-NEXT: s_wait_loadcnt 0x0 180; GFX12-NEXT: ; return to shader part epilog 181main_body: 182 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 65532, i32 0, i32 63, i32 0) 183 ret <4 x float> %data 184} 185 186define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_23bit(<4 x i32> inreg) { 187; PREGFX10-LABEL: tbuffer_load_voffset_large_23bit: 188; PREGFX10: ; %bb.0: ; %main_body 189; PREGFX10-NEXT: v_mov_b32_e32 v0, 0x7ff000 190; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offen offset:4092 191; PREGFX10-NEXT: s_waitcnt vmcnt(0) 192; PREGFX10-NEXT: ; return to shader part epilog 193; 194; GFX10-LABEL: tbuffer_load_voffset_large_23bit: 195; GFX10: ; %bb.0: ; %main_body 196; GFX10-NEXT: v_mov_b32_e32 v0, 0x7ff000 197; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092 198; GFX10-NEXT: s_waitcnt vmcnt(0) 199; GFX10-NEXT: ; return to shader part epilog 200; 201; GFX11-LABEL: tbuffer_load_voffset_large_23bit: 202; GFX11: ; %bb.0: ; %main_body 203; GFX11-NEXT: v_mov_b32_e32 v0, 0x7ff000 204; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092 205; GFX11-NEXT: s_waitcnt vmcnt(0) 206; GFX11-NEXT: ; return to shader part epilog 207; 208; GFX12-LABEL: tbuffer_load_voffset_large_23bit: 209; GFX12: ; %bb.0: ; %main_body 210; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] offset:8388604 211; GFX12-NEXT: s_wait_loadcnt 0x0 212; GFX12-NEXT: ; return to shader part epilog 213main_body: 214 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 8388604, i32 0, i32 63, i32 0) 215 ret <4 x float> %data 216} 217 218define amdgpu_ps <4 x float> @tbuffer_load_voffset_large_24bit(<4 x i32> inreg) { 219; PREGFX10-LABEL: tbuffer_load_voffset_large_24bit: 220; PREGFX10: ; %bb.0: ; %main_body 221; PREGFX10-NEXT: v_mov_b32_e32 v0, 0xfff000 222; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_SSCALED] offen offset:4092 223; PREGFX10-NEXT: s_waitcnt vmcnt(0) 224; PREGFX10-NEXT: ; return to shader part epilog 225; 226; GFX10-LABEL: tbuffer_load_voffset_large_24bit: 227; GFX10: ; %bb.0: ; %main_body 228; GFX10-NEXT: v_mov_b32_e32 v0, 0xfff000 229; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_SINT] offen offset:4092 230; GFX10-NEXT: s_waitcnt vmcnt(0) 231; GFX10-NEXT: ; return to shader part epilog 232; 233; GFX11-LABEL: tbuffer_load_voffset_large_24bit: 234; GFX11: ; %bb.0: ; %main_body 235; GFX11-NEXT: v_mov_b32_e32 v0, 0xfff000 236; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:4092 237; GFX11-NEXT: s_waitcnt vmcnt(0) 238; GFX11-NEXT: ; return to shader part epilog 239; 240; GFX12-LABEL: tbuffer_load_voffset_large_24bit: 241; GFX12: ; %bb.0: ; %main_body 242; GFX12-NEXT: v_mov_b32_e32 v0, 0x800000 243; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], null format:[BUF_FMT_32_32_32_32_FLOAT] offen offset:8388604 244; GFX12-NEXT: s_wait_loadcnt 0x0 245; GFX12-NEXT: ; return to shader part epilog 246main_body: 247 %data = call <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32> %0, i32 16777212, i32 0, i32 63, i32 0) 248 ret <4 x float> %data 249} 250 251define amdgpu_vs {<4 x float>, <4 x float>, <4 x float>} @tbuffer_load_immoffs_large(<4 x i32> inreg, i32 inreg %soffs) { 252; PREGFX10-LABEL: tbuffer_load_immoffs_large: 253; PREGFX10: ; %bb.0: 254; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 61 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] offset:4095 255; PREGFX10-NEXT: tbuffer_load_format_xyzw v[4:7], off, s[0:3], s4 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_SSCALED] offset:73 256; PREGFX10-NEXT: tbuffer_load_format_xyzw v[8:11], off, s[0:3], s4 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] offset:1 257; PREGFX10-NEXT: s_waitcnt vmcnt(0) 258; PREGFX10-NEXT: ; return to shader part epilog 259; 260; GFX10-LABEL: tbuffer_load_immoffs_large: 261; GFX10: ; %bb.0: 262; GFX10-NEXT: s_clause 0x2 263; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 61 format:[BUF_FMT_10_10_10_2_SSCALED] offset:4095 264; GFX10-NEXT: tbuffer_load_format_xyzw v[4:7], off, s[0:3], s4 format:[BUF_FMT_32_32_UINT] offset:73 265; GFX10-NEXT: tbuffer_load_format_xyzw v[8:11], off, s[0:3], s4 format:[BUF_FMT_32_32_32_32_FLOAT] offset:1 266; GFX10-NEXT: s_waitcnt vmcnt(0) 267; GFX10-NEXT: ; return to shader part epilog 268; 269; GFX11-LABEL: tbuffer_load_immoffs_large: 270; GFX11: ; %bb.0: 271; GFX11-NEXT: s_clause 0x2 272; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], 61 format:[BUF_FMT_8_8_8_8_SINT] offset:4095 273; GFX11-NEXT: tbuffer_load_format_xyzw v[4:7], off, s[0:3], s4 format:[BUF_FMT_32_32_32_32_SINT] offset:73 274; GFX11-NEXT: tbuffer_load_format_xyzw v[8:11], off, s[0:3], s4 format:77 offset:1 275; GFX11-NEXT: s_waitcnt vmcnt(0) 276; GFX11-NEXT: ; return to shader part epilog 277; 278; GFX12-LABEL: tbuffer_load_immoffs_large: 279; GFX12: ; %bb.0: 280; GFX12-NEXT: s_mov_b32 s5, 61 281; GFX12-NEXT: s_clause 0x2 282; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], off, s[0:3], s5 format:[BUF_FMT_8_8_8_8_SINT] offset:4095 283; GFX12-NEXT: tbuffer_load_format_xyzw v[4:7], off, s[0:3], s4 format:[BUF_FMT_32_32_32_32_SINT] offset:73 284; GFX12-NEXT: tbuffer_load_format_xyzw v[8:11], off, s[0:3], s4 format:77 offset:1 285; GFX12-NEXT: s_wait_loadcnt 0x0 286; GFX12-NEXT: ; return to shader part epilog 287 %vdata = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 4095, i32 61, i32 47, i32 0) 288 %vdata_glc = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 73, i32 %soffs, i32 62, i32 0) 289 %vdata_slc = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 1, i32 %soffs, i32 77, i32 0) 290 %vdata.f = bitcast <4 x i32> %vdata to <4 x float> 291 %vdata_glc.f = bitcast <4 x i32> %vdata_glc to <4 x float> 292 %vdata_slc.f = bitcast <4 x i32> %vdata_slc to <4 x float> 293 %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %vdata.f, 0 294 %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %vdata_glc.f, 1 295 %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %vdata_slc.f, 2 296 ret {<4 x float>, <4 x float>, <4 x float>} %r2 297} 298 299define amdgpu_vs <4 x float> @tbuffer_load_ofs(<4 x i32> inreg, i32 %voffs) { 300; PREGFX10-LABEL: tbuffer_load_ofs: 301; PREGFX10: ; %bb.0: ; %main_body 302; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] offen 303; PREGFX10-NEXT: s_waitcnt vmcnt(0) 304; PREGFX10-NEXT: ; return to shader part epilog 305; 306; GFX10-LABEL: tbuffer_load_ofs: 307; GFX10: ; %bb.0: ; %main_body 308; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:78 offen 309; GFX10-NEXT: s_waitcnt vmcnt(0) 310; GFX10-NEXT: ; return to shader part epilog 311; 312; GFX11-LABEL: tbuffer_load_ofs: 313; GFX11: ; %bb.0: ; %main_body 314; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:78 offen 315; GFX11-NEXT: s_waitcnt vmcnt(0) 316; GFX11-NEXT: ; return to shader part epilog 317; 318; GFX12-LABEL: tbuffer_load_ofs: 319; GFX12: ; %bb.0: ; %main_body 320; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], null format:78 offen 321; GFX12-NEXT: s_wait_loadcnt 0x0 322; GFX12-NEXT: ; return to shader part epilog 323main_body: 324 %vdata = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 %voffs, i32 0, i32 78, i32 0) 325 %vdata.f = bitcast <4 x i32> %vdata to <4 x float> 326 ret <4 x float> %vdata.f 327} 328 329define amdgpu_vs <4 x float> @tbuffer_load_ofs_imm(<4 x i32> inreg, i32 %voffs) { 330; PREGFX10-LABEL: tbuffer_load_ofs_imm: 331; PREGFX10: ; %bb.0: ; %main_body 332; PREGFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_UINT] offen offset:52 333; PREGFX10-NEXT: s_waitcnt vmcnt(0) 334; PREGFX10-NEXT: ; return to shader part epilog 335; 336; GFX10-LABEL: tbuffer_load_ofs_imm: 337; GFX10: ; %bb.0: ; %main_body 338; GFX10-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:78 offen offset:52 339; GFX10-NEXT: s_waitcnt vmcnt(0) 340; GFX10-NEXT: ; return to shader part epilog 341; 342; GFX11-LABEL: tbuffer_load_ofs_imm: 343; GFX11: ; %bb.0: ; %main_body 344; GFX11-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], 0 format:78 offen offset:52 345; GFX11-NEXT: s_waitcnt vmcnt(0) 346; GFX11-NEXT: ; return to shader part epilog 347; 348; GFX12-LABEL: tbuffer_load_ofs_imm: 349; GFX12: ; %bb.0: ; %main_body 350; GFX12-NEXT: tbuffer_load_format_xyzw v[0:3], v0, s[0:3], null format:78 offen offset:52 351; GFX12-NEXT: s_wait_loadcnt 0x0 352; GFX12-NEXT: ; return to shader part epilog 353main_body: 354 %ofs = add i32 %voffs, 52 355 %vdata = call <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32> %0, i32 %ofs, i32 0, i32 78, i32 0) 356 %vdata.f = bitcast <4 x i32> %vdata to <4 x float> 357 ret <4 x float> %vdata.f 358} 359 360define amdgpu_vs <2 x float> @buffer_load_xy(<4 x i32> inreg %rsrc) { 361; PREGFX10-LABEL: buffer_load_xy: 362; PREGFX10: ; %bb.0: 363; PREGFX10-NEXT: tbuffer_load_format_xy v[0:1], off, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] 364; PREGFX10-NEXT: s_waitcnt vmcnt(0) 365; PREGFX10-NEXT: ; return to shader part epilog 366; 367; GFX10-LABEL: buffer_load_xy: 368; GFX10: ; %bb.0: 369; GFX10-NEXT: tbuffer_load_format_xy v[0:1], off, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] 370; GFX10-NEXT: s_waitcnt vmcnt(0) 371; GFX10-NEXT: ; return to shader part epilog 372; 373; GFX11-LABEL: buffer_load_xy: 374; GFX11: ; %bb.0: 375; GFX11-NEXT: tbuffer_load_format_xy v[0:1], off, s[0:3], 0 format:77 376; GFX11-NEXT: s_waitcnt vmcnt(0) 377; GFX11-NEXT: ; return to shader part epilog 378; 379; GFX12-LABEL: buffer_load_xy: 380; GFX12: ; %bb.0: 381; GFX12-NEXT: tbuffer_load_format_xy v[0:1], off, s[0:3], null format:77 382; GFX12-NEXT: s_wait_loadcnt 0x0 383; GFX12-NEXT: ; return to shader part epilog 384 %vdata = call <2 x i32> @llvm.amdgcn.raw.tbuffer.load.v2i32(<4 x i32> %rsrc, i32 0, i32 0, i32 77, i32 0) 385 %vdata.f = bitcast <2 x i32> %vdata to <2 x float> 386 ret <2 x float> %vdata.f 387} 388 389define amdgpu_vs float @buffer_load_x(<4 x i32> inreg %rsrc) { 390; PREGFX10-LABEL: buffer_load_x: 391; PREGFX10: ; %bb.0: 392; PREGFX10-NEXT: tbuffer_load_format_x v0, off, s[0:3], 0 format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] 393; PREGFX10-NEXT: s_waitcnt vmcnt(0) 394; PREGFX10-NEXT: ; return to shader part epilog 395; 396; GFX10-LABEL: buffer_load_x: 397; GFX10: ; %bb.0: 398; GFX10-NEXT: tbuffer_load_format_x v0, off, s[0:3], 0 format:[BUF_FMT_32_32_32_32_FLOAT] 399; GFX10-NEXT: s_waitcnt vmcnt(0) 400; GFX10-NEXT: ; return to shader part epilog 401; 402; GFX11-LABEL: buffer_load_x: 403; GFX11: ; %bb.0: 404; GFX11-NEXT: tbuffer_load_format_x v0, off, s[0:3], 0 format:77 405; GFX11-NEXT: s_waitcnt vmcnt(0) 406; GFX11-NEXT: ; return to shader part epilog 407; 408; GFX12-LABEL: buffer_load_x: 409; GFX12: ; %bb.0: 410; GFX12-NEXT: tbuffer_load_format_x v0, off, s[0:3], null format:77 411; GFX12-NEXT: s_wait_loadcnt 0x0 412; GFX12-NEXT: ; return to shader part epilog 413 %vdata = call i32 @llvm.amdgcn.raw.tbuffer.load.i32(<4 x i32> %rsrc, i32 0, i32 0, i32 77, i32 0) 414 %vdata.f = bitcast i32 %vdata to float 415 ret float %vdata.f 416} 417 418declare i32 @llvm.amdgcn.raw.tbuffer.load.i32(<4 x i32>, i32, i32, i32, i32) 419declare <2 x i32> @llvm.amdgcn.raw.tbuffer.load.v2i32(<4 x i32>, i32, i32, i32, i32) 420declare <4 x i32> @llvm.amdgcn.raw.tbuffer.load.v4i32(<4 x i32>, i32, i32, i32, i32) 421declare <4 x float> @llvm.amdgcn.raw.tbuffer.load.v4f32(<4 x i32>, i32, i32, i32, i32) 422