1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefixes=GFX6 %s 3;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefixes=GFX8PLUS %s 4;RUN: llc < %s -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck --check-prefixes=GFX11 %s 5 6define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load(ptr addrspace(8) inreg) { 7; GFX6-LABEL: buffer_load: 8; GFX6: ; %bb.0: ; %main_body 9; GFX6-NEXT: v_mov_b32_e32 v8, 0 10; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 0 idxen 11; GFX6-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], 0 idxen glc 12; GFX6-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], 0 idxen slc 13; GFX6-NEXT: s_waitcnt vmcnt(0) 14; GFX6-NEXT: ; return to shader part epilog 15; 16; GFX8PLUS-LABEL: buffer_load: 17; GFX8PLUS: ; %bb.0: ; %main_body 18; GFX8PLUS-NEXT: v_mov_b32_e32 v8, 0 19; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 0 idxen 20; GFX8PLUS-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], 0 idxen glc 21; GFX8PLUS-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], 0 idxen slc 22; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 23; GFX8PLUS-NEXT: ; return to shader part epilog 24; 25; GFX11-LABEL: buffer_load: 26; GFX11: ; %bb.0: ; %main_body 27; GFX11-NEXT: v_mov_b32_e32 v8, 0 28; GFX11-NEXT: s_clause 0x2 29; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 0 idxen 30; GFX11-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], 0 idxen glc 31; GFX11-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], 0 idxen slc 32; GFX11-NEXT: s_waitcnt vmcnt(0) 33; GFX11-NEXT: ; return to shader part epilog 34main_body: 35 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 0) 36 %data_glc = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 1) 37 %data_slc = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 2) 38 %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %data, 0 39 %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %data_glc, 1 40 %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %data_slc, 2 41 ret {<4 x float>, <4 x float>, <4 x float>} %r2 42} 43 44define amdgpu_ps <4 x float> @buffer_load_immoffs(ptr addrspace(8) inreg) { 45; GFX6-LABEL: buffer_load_immoffs: 46; GFX6: ; %bb.0: ; %main_body 47; GFX6-NEXT: v_mov_b32_e32 v0, 0 48; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:42 49; GFX6-NEXT: s_waitcnt vmcnt(0) 50; GFX6-NEXT: ; return to shader part epilog 51; 52; GFX8PLUS-LABEL: buffer_load_immoffs: 53; GFX8PLUS: ; %bb.0: ; %main_body 54; GFX8PLUS-NEXT: v_mov_b32_e32 v0, 0 55; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:42 56; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 57; GFX8PLUS-NEXT: ; return to shader part epilog 58; 59; GFX11-LABEL: buffer_load_immoffs: 60; GFX11: ; %bb.0: ; %main_body 61; GFX11-NEXT: v_mov_b32_e32 v0, 0 62; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:42 63; GFX11-NEXT: s_waitcnt vmcnt(0) 64; GFX11-NEXT: ; return to shader part epilog 65main_body: 66 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 42, i32 0, i32 0) 67 ret <4 x float> %data 68} 69 70define amdgpu_ps <4 x float> @buffer_load_immoffs_large(ptr addrspace(8) inreg) { 71; GFX6-LABEL: buffer_load_immoffs_large: 72; GFX6: ; %bb.0: ; %main_body 73; GFX6-NEXT: v_mov_b32_e32 v8, 0 74; GFX6-NEXT: s_movk_i32 s4, 0x7ffc 75; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 60 idxen offset:4092 76; GFX6-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], s4 idxen offset:4092 77; GFX6-NEXT: s_mov_b32 s4, 0x8ffc 78; GFX6-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], s4 idxen offset:4 79; GFX6-NEXT: s_waitcnt vmcnt(1) 80; GFX6-NEXT: v_add_f32_e32 v3, v3, v7 81; GFX6-NEXT: v_add_f32_e32 v2, v2, v6 82; GFX6-NEXT: v_add_f32_e32 v1, v1, v5 83; GFX6-NEXT: v_add_f32_e32 v0, v0, v4 84; GFX6-NEXT: s_waitcnt vmcnt(0) 85; GFX6-NEXT: v_add_f32_e32 v0, v8, v0 86; GFX6-NEXT: v_add_f32_e32 v1, v9, v1 87; GFX6-NEXT: v_add_f32_e32 v2, v10, v2 88; GFX6-NEXT: v_add_f32_e32 v3, v11, v3 89; GFX6-NEXT: ; return to shader part epilog 90; 91; GFX8PLUS-LABEL: buffer_load_immoffs_large: 92; GFX8PLUS: ; %bb.0: ; %main_body 93; GFX8PLUS-NEXT: v_mov_b32_e32 v8, 0 94; GFX8PLUS-NEXT: s_movk_i32 s4, 0x7ffc 95; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 60 idxen offset:4092 96; GFX8PLUS-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], s4 idxen offset:4092 97; GFX8PLUS-NEXT: s_mov_b32 s4, 0x8ffc 98; GFX8PLUS-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], s4 idxen offset:4 99; GFX8PLUS-NEXT: s_waitcnt vmcnt(1) 100; GFX8PLUS-NEXT: v_add_f32_e32 v3, v3, v7 101; GFX8PLUS-NEXT: v_add_f32_e32 v2, v2, v6 102; GFX8PLUS-NEXT: v_add_f32_e32 v1, v1, v5 103; GFX8PLUS-NEXT: v_add_f32_e32 v0, v0, v4 104; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 105; GFX8PLUS-NEXT: v_add_f32_e32 v0, v8, v0 106; GFX8PLUS-NEXT: v_add_f32_e32 v1, v9, v1 107; GFX8PLUS-NEXT: v_add_f32_e32 v2, v10, v2 108; GFX8PLUS-NEXT: v_add_f32_e32 v3, v11, v3 109; GFX8PLUS-NEXT: ; return to shader part epilog 110; 111; GFX11-LABEL: buffer_load_immoffs_large: 112; GFX11: ; %bb.0: ; %main_body 113; GFX11-NEXT: v_mov_b32_e32 v8, 0 114; GFX11-NEXT: s_movk_i32 s4, 0x7ffc 115; GFX11-NEXT: s_clause 0x1 116; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 60 idxen offset:4092 117; GFX11-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], s4 idxen offset:4092 118; GFX11-NEXT: s_mov_b32 s4, 0x8ffc 119; GFX11-NEXT: s_waitcnt vmcnt(0) 120; GFX11-NEXT: v_add_f32_e32 v1, v1, v5 121; GFX11-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], s4 idxen offset:4 122; GFX11-NEXT: v_dual_add_f32 v0, v0, v4 :: v_dual_add_f32 v3, v3, v7 123; GFX11-NEXT: s_waitcnt vmcnt(0) 124; GFX11-NEXT: v_dual_add_f32 v2, v2, v6 :: v_dual_add_f32 v1, v9, v1 125; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 126; GFX11-NEXT: v_dual_add_f32 v0, v8, v0 :: v_dual_add_f32 v3, v11, v3 127; GFX11-NEXT: v_add_f32_e32 v2, v10, v2 128; GFX11-NEXT: ; return to shader part epilog 129main_body: 130 %d.0 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 4092, i32 60, i32 0) 131 %d.1 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 4092, i32 32764, i32 0) 132 %d.2 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 4, i32 36860, i32 0) 133 %d.3 = fadd <4 x float> %d.0, %d.1 134 %data = fadd <4 x float> %d.2, %d.3 135 ret <4 x float> %data 136} 137 138define amdgpu_ps <4 x float> @buffer_load_voffset_large_12bit(ptr addrspace(8) inreg) { 139; GFX6-LABEL: buffer_load_voffset_large_12bit: 140; GFX6: ; %bb.0: ; %main_body 141; GFX6-NEXT: v_mov_b32_e32 v0, 0 142; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092 143; GFX6-NEXT: s_waitcnt vmcnt(0) 144; GFX6-NEXT: ; return to shader part epilog 145; 146; GFX8PLUS-LABEL: buffer_load_voffset_large_12bit: 147; GFX8PLUS: ; %bb.0: ; %main_body 148; GFX8PLUS-NEXT: v_mov_b32_e32 v0, 0 149; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092 150; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 151; GFX8PLUS-NEXT: ; return to shader part epilog 152; 153; GFX11-LABEL: buffer_load_voffset_large_12bit: 154; GFX11: ; %bb.0: ; %main_body 155; GFX11-NEXT: v_mov_b32_e32 v0, 0 156; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092 157; GFX11-NEXT: s_waitcnt vmcnt(0) 158; GFX11-NEXT: ; return to shader part epilog 159main_body: 160 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 4092, i32 0, i32 0) 161 ret <4 x float> %data 162} 163 164define amdgpu_ps <4 x float> @buffer_load_voffset_large_13bit(ptr addrspace(8) inreg) { 165; GFX6-LABEL: buffer_load_voffset_large_13bit: 166; GFX6: ; %bb.0: ; %main_body 167; GFX6-NEXT: s_mov_b32 s4, 0 168; GFX6-NEXT: v_mov_b32_e32 v1, 0x1000 169; GFX6-NEXT: v_mov_b32_e32 v0, s4 170; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 171; GFX6-NEXT: s_waitcnt vmcnt(0) 172; GFX6-NEXT: ; return to shader part epilog 173; 174; GFX8PLUS-LABEL: buffer_load_voffset_large_13bit: 175; GFX8PLUS: ; %bb.0: ; %main_body 176; GFX8PLUS-NEXT: s_mov_b32 s4, 0 177; GFX8PLUS-NEXT: v_mov_b32_e32 v1, 0x1000 178; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4 179; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 180; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 181; GFX8PLUS-NEXT: ; return to shader part epilog 182; 183; GFX11-LABEL: buffer_load_voffset_large_13bit: 184; GFX11: ; %bb.0: ; %main_body 185; GFX11-NEXT: s_mov_b32 s4, 0 186; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 187; GFX11-NEXT: v_dual_mov_b32 v1, 0x1000 :: v_dual_mov_b32 v0, s4 188; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 189; GFX11-NEXT: s_waitcnt vmcnt(0) 190; GFX11-NEXT: ; return to shader part epilog 191main_body: 192 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 8188, i32 0, i32 0) 193 ret <4 x float> %data 194} 195 196define amdgpu_ps <4 x float> @buffer_load_voffset_large_16bit(ptr addrspace(8) inreg) { 197; GFX6-LABEL: buffer_load_voffset_large_16bit: 198; GFX6: ; %bb.0: ; %main_body 199; GFX6-NEXT: s_mov_b32 s4, 0 200; GFX6-NEXT: v_mov_b32_e32 v1, 0xf000 201; GFX6-NEXT: v_mov_b32_e32 v0, s4 202; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 203; GFX6-NEXT: s_waitcnt vmcnt(0) 204; GFX6-NEXT: ; return to shader part epilog 205; 206; GFX8PLUS-LABEL: buffer_load_voffset_large_16bit: 207; GFX8PLUS: ; %bb.0: ; %main_body 208; GFX8PLUS-NEXT: s_mov_b32 s4, 0 209; GFX8PLUS-NEXT: v_mov_b32_e32 v1, 0xf000 210; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4 211; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 212; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 213; GFX8PLUS-NEXT: ; return to shader part epilog 214; 215; GFX11-LABEL: buffer_load_voffset_large_16bit: 216; GFX11: ; %bb.0: ; %main_body 217; GFX11-NEXT: s_mov_b32 s4, 0 218; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 219; GFX11-NEXT: v_dual_mov_b32 v1, 0xf000 :: v_dual_mov_b32 v0, s4 220; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 221; GFX11-NEXT: s_waitcnt vmcnt(0) 222; GFX11-NEXT: ; return to shader part epilog 223main_body: 224 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 65532, i32 0, i32 0) 225 ret <4 x float> %data 226} 227 228define amdgpu_ps <4 x float> @buffer_load_voffset_large_23bit(ptr addrspace(8) inreg) { 229; GFX6-LABEL: buffer_load_voffset_large_23bit: 230; GFX6: ; %bb.0: ; %main_body 231; GFX6-NEXT: s_mov_b32 s4, 0 232; GFX6-NEXT: v_mov_b32_e32 v1, 0x7ff000 233; GFX6-NEXT: v_mov_b32_e32 v0, s4 234; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 235; GFX6-NEXT: s_waitcnt vmcnt(0) 236; GFX6-NEXT: ; return to shader part epilog 237; 238; GFX8PLUS-LABEL: buffer_load_voffset_large_23bit: 239; GFX8PLUS: ; %bb.0: ; %main_body 240; GFX8PLUS-NEXT: s_mov_b32 s4, 0 241; GFX8PLUS-NEXT: v_mov_b32_e32 v1, 0x7ff000 242; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4 243; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 244; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 245; GFX8PLUS-NEXT: ; return to shader part epilog 246; 247; GFX11-LABEL: buffer_load_voffset_large_23bit: 248; GFX11: ; %bb.0: ; %main_body 249; GFX11-NEXT: s_mov_b32 s4, 0 250; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 251; GFX11-NEXT: v_dual_mov_b32 v1, 0x7ff000 :: v_dual_mov_b32 v0, s4 252; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 253; GFX11-NEXT: s_waitcnt vmcnt(0) 254; GFX11-NEXT: ; return to shader part epilog 255main_body: 256 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 8388604, i32 0, i32 0) 257 ret <4 x float> %data 258} 259 260define amdgpu_ps <4 x float> @buffer_load_voffset_large_24bit(ptr addrspace(8) inreg) { 261; GFX6-LABEL: buffer_load_voffset_large_24bit: 262; GFX6: ; %bb.0: ; %main_body 263; GFX6-NEXT: s_mov_b32 s4, 0 264; GFX6-NEXT: v_mov_b32_e32 v1, 0xfff000 265; GFX6-NEXT: v_mov_b32_e32 v0, s4 266; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 267; GFX6-NEXT: s_waitcnt vmcnt(0) 268; GFX6-NEXT: ; return to shader part epilog 269; 270; GFX8PLUS-LABEL: buffer_load_voffset_large_24bit: 271; GFX8PLUS: ; %bb.0: ; %main_body 272; GFX8PLUS-NEXT: s_mov_b32 s4, 0 273; GFX8PLUS-NEXT: v_mov_b32_e32 v1, 0xfff000 274; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4 275; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 276; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 277; GFX8PLUS-NEXT: ; return to shader part epilog 278; 279; GFX11-LABEL: buffer_load_voffset_large_24bit: 280; GFX11: ; %bb.0: ; %main_body 281; GFX11-NEXT: s_mov_b32 s4, 0 282; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 283; GFX11-NEXT: v_dual_mov_b32 v1, 0xfff000 :: v_dual_mov_b32 v0, s4 284; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 285; GFX11-NEXT: s_waitcnt vmcnt(0) 286; GFX11-NEXT: ; return to shader part epilog 287main_body: 288 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 16777212, i32 0, i32 0) 289 ret <4 x float> %data 290} 291 292define amdgpu_ps <4 x float> @buffer_load_idx(ptr addrspace(8) inreg, i32) { 293; GFX6-LABEL: buffer_load_idx: 294; GFX6: ; %bb.0: ; %main_body 295; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen 296; GFX6-NEXT: s_waitcnt vmcnt(0) 297; GFX6-NEXT: ; return to shader part epilog 298; 299; GFX8PLUS-LABEL: buffer_load_idx: 300; GFX8PLUS: ; %bb.0: ; %main_body 301; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen 302; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 303; GFX8PLUS-NEXT: ; return to shader part epilog 304; 305; GFX11-LABEL: buffer_load_idx: 306; GFX11: ; %bb.0: ; %main_body 307; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen 308; GFX11-NEXT: s_waitcnt vmcnt(0) 309; GFX11-NEXT: ; return to shader part epilog 310main_body: 311 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 %1, i32 0, i32 0, i32 0) 312 ret <4 x float> %data 313} 314 315define amdgpu_ps <4 x float> @buffer_load_ofs(ptr addrspace(8) inreg, i32) { 316; GFX6-LABEL: buffer_load_ofs: 317; GFX6: ; %bb.0: ; %main_body 318; GFX6-NEXT: s_mov_b32 s4, 0 319; GFX6-NEXT: v_mov_b32_e32 v1, v0 320; GFX6-NEXT: v_mov_b32_e32 v0, s4 321; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen 322; GFX6-NEXT: s_waitcnt vmcnt(0) 323; GFX6-NEXT: ; return to shader part epilog 324; 325; GFX8PLUS-LABEL: buffer_load_ofs: 326; GFX8PLUS: ; %bb.0: ; %main_body 327; GFX8PLUS-NEXT: s_mov_b32 s4, 0 328; GFX8PLUS-NEXT: v_mov_b32_e32 v1, v0 329; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4 330; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen 331; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 332; GFX8PLUS-NEXT: ; return to shader part epilog 333; 334; GFX11-LABEL: buffer_load_ofs: 335; GFX11: ; %bb.0: ; %main_body 336; GFX11-NEXT: s_mov_b32 s4, 0 337; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 338; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4 339; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen 340; GFX11-NEXT: s_waitcnt vmcnt(0) 341; GFX11-NEXT: ; return to shader part epilog 342main_body: 343 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 %1, i32 0, i32 0) 344 ret <4 x float> %data 345} 346 347define amdgpu_ps <4 x float> @buffer_load_ofs_imm(ptr addrspace(8) inreg, i32) { 348; GFX6-LABEL: buffer_load_ofs_imm: 349; GFX6: ; %bb.0: ; %main_body 350; GFX6-NEXT: s_mov_b32 s4, 0 351; GFX6-NEXT: v_mov_b32_e32 v1, v0 352; GFX6-NEXT: v_mov_b32_e32 v0, s4 353; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:60 354; GFX6-NEXT: s_waitcnt vmcnt(0) 355; GFX6-NEXT: ; return to shader part epilog 356; 357; GFX8PLUS-LABEL: buffer_load_ofs_imm: 358; GFX8PLUS: ; %bb.0: ; %main_body 359; GFX8PLUS-NEXT: s_mov_b32 s4, 0 360; GFX8PLUS-NEXT: v_mov_b32_e32 v1, v0 361; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4 362; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:60 363; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 364; GFX8PLUS-NEXT: ; return to shader part epilog 365; 366; GFX11-LABEL: buffer_load_ofs_imm: 367; GFX11: ; %bb.0: ; %main_body 368; GFX11-NEXT: s_mov_b32 s4, 0 369; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 370; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4 371; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:60 372; GFX11-NEXT: s_waitcnt vmcnt(0) 373; GFX11-NEXT: ; return to shader part epilog 374main_body: 375 %ofs = add i32 %1, 60 376 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 %ofs, i32 0, i32 0) 377 ret <4 x float> %data 378} 379 380define amdgpu_ps <4 x float> @buffer_load_both(ptr addrspace(8) inreg, i32, i32) { 381; GFX6-LABEL: buffer_load_both: 382; GFX6: ; %bb.0: ; %main_body 383; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen 384; GFX6-NEXT: s_waitcnt vmcnt(0) 385; GFX6-NEXT: ; return to shader part epilog 386; 387; GFX8PLUS-LABEL: buffer_load_both: 388; GFX8PLUS: ; %bb.0: ; %main_body 389; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen 390; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 391; GFX8PLUS-NEXT: ; return to shader part epilog 392; 393; GFX11-LABEL: buffer_load_both: 394; GFX11: ; %bb.0: ; %main_body 395; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen 396; GFX11-NEXT: s_waitcnt vmcnt(0) 397; GFX11-NEXT: ; return to shader part epilog 398main_body: 399 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 %1, i32 %2, i32 0, i32 0) 400 ret <4 x float> %data 401} 402 403define amdgpu_ps <4 x float> @buffer_load_both_reversed(ptr addrspace(8) inreg, i32, i32) { 404; GFX6-LABEL: buffer_load_both_reversed: 405; GFX6: ; %bb.0: ; %main_body 406; GFX6-NEXT: v_mov_b32_e32 v2, v0 407; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], 0 idxen offen 408; GFX6-NEXT: s_waitcnt vmcnt(0) 409; GFX6-NEXT: ; return to shader part epilog 410; 411; GFX8PLUS-LABEL: buffer_load_both_reversed: 412; GFX8PLUS: ; %bb.0: ; %main_body 413; GFX8PLUS-NEXT: v_mov_b32_e32 v2, v0 414; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], 0 idxen offen 415; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 416; GFX8PLUS-NEXT: ; return to shader part epilog 417; 418; GFX11-LABEL: buffer_load_both_reversed: 419; GFX11: ; %bb.0: ; %main_body 420; GFX11-NEXT: v_mov_b32_e32 v2, v0 421; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], 0 idxen offen 422; GFX11-NEXT: s_waitcnt vmcnt(0) 423; GFX11-NEXT: ; return to shader part epilog 424main_body: 425 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 %2, i32 %1, i32 0, i32 0) 426 ret <4 x float> %data 427} 428 429define amdgpu_ps float @buffer_load_x(ptr addrspace(8) inreg %rsrc) { 430; GFX6-LABEL: buffer_load_x: 431; GFX6: ; %bb.0: ; %main_body 432; GFX6-NEXT: v_mov_b32_e32 v0, 0 433; GFX6-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen 434; GFX6-NEXT: s_waitcnt vmcnt(0) 435; GFX6-NEXT: ; return to shader part epilog 436; 437; GFX8PLUS-LABEL: buffer_load_x: 438; GFX8PLUS: ; %bb.0: ; %main_body 439; GFX8PLUS-NEXT: v_mov_b32_e32 v0, 0 440; GFX8PLUS-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen 441; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 442; GFX8PLUS-NEXT: ; return to shader part epilog 443; 444; GFX11-LABEL: buffer_load_x: 445; GFX11: ; %bb.0: ; %main_body 446; GFX11-NEXT: v_mov_b32_e32 v0, 0 447; GFX11-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen 448; GFX11-NEXT: s_waitcnt vmcnt(0) 449; GFX11-NEXT: ; return to shader part epilog 450main_body: 451 %data = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) 452 ret float %data 453} 454 455define amdgpu_ps float @buffer_load_x_i32(ptr addrspace(8) inreg %rsrc) { 456; GFX6-LABEL: buffer_load_x_i32: 457; GFX6: ; %bb.0: ; %main_body 458; GFX6-NEXT: v_mov_b32_e32 v0, 0 459; GFX6-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen 460; GFX6-NEXT: s_waitcnt vmcnt(0) 461; GFX6-NEXT: ; return to shader part epilog 462; 463; GFX8PLUS-LABEL: buffer_load_x_i32: 464; GFX8PLUS: ; %bb.0: ; %main_body 465; GFX8PLUS-NEXT: v_mov_b32_e32 v0, 0 466; GFX8PLUS-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen 467; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 468; GFX8PLUS-NEXT: ; return to shader part epilog 469; 470; GFX11-LABEL: buffer_load_x_i32: 471; GFX11: ; %bb.0: ; %main_body 472; GFX11-NEXT: v_mov_b32_e32 v0, 0 473; GFX11-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen 474; GFX11-NEXT: s_waitcnt vmcnt(0) 475; GFX11-NEXT: ; return to shader part epilog 476main_body: 477 %data = call i32 @llvm.amdgcn.struct.ptr.buffer.load.format.i32(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) 478 %fdata = bitcast i32 %data to float 479 ret float %fdata 480} 481 482define amdgpu_ps <2 x float> @buffer_load_xy(ptr addrspace(8) inreg %rsrc) { 483; GFX6-LABEL: buffer_load_xy: 484; GFX6: ; %bb.0: ; %main_body 485; GFX6-NEXT: v_mov_b32_e32 v0, 0 486; GFX6-NEXT: buffer_load_format_xy v[0:1], v0, s[0:3], 0 idxen 487; GFX6-NEXT: s_waitcnt vmcnt(0) 488; GFX6-NEXT: ; return to shader part epilog 489; 490; GFX8PLUS-LABEL: buffer_load_xy: 491; GFX8PLUS: ; %bb.0: ; %main_body 492; GFX8PLUS-NEXT: v_mov_b32_e32 v0, 0 493; GFX8PLUS-NEXT: buffer_load_format_xy v[0:1], v0, s[0:3], 0 idxen 494; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 495; GFX8PLUS-NEXT: ; return to shader part epilog 496; 497; GFX11-LABEL: buffer_load_xy: 498; GFX11: ; %bb.0: ; %main_body 499; GFX11-NEXT: v_mov_b32_e32 v0, 0 500; GFX11-NEXT: buffer_load_format_xy v[0:1], v0, s[0:3], 0 idxen 501; GFX11-NEXT: s_waitcnt vmcnt(0) 502; GFX11-NEXT: ; return to shader part epilog 503main_body: 504 %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) 505 ret <2 x float> %data 506} 507 508define amdgpu_cs float @buffer_load_v4i32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) { 509; GFX6-LABEL: buffer_load_v4i32_tfe: 510; GFX6: ; %bb.0: 511; GFX6-NEXT: v_mov_b32_e32 v2, 0 512; GFX6-NEXT: buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe 513; GFX6-NEXT: s_mov_b32 s2, 0 514; GFX6-NEXT: s_mov_b32 s3, 0xf000 515; GFX6-NEXT: s_mov_b32 s0, s2 516; GFX6-NEXT: s_mov_b32 s1, s2 517; GFX6-NEXT: s_waitcnt vmcnt(0) 518; GFX6-NEXT: buffer_store_dwordx4 v[2:5], v[0:1], s[0:3], 0 addr64 519; GFX6-NEXT: v_mov_b32_e32 v0, v6 520; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 521; GFX6-NEXT: ; return to shader part epilog 522; 523; GFX8PLUS-LABEL: buffer_load_v4i32_tfe: 524; GFX8PLUS: ; %bb.0: 525; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0 526; GFX8PLUS-NEXT: buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe 527; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 528; GFX8PLUS-NEXT: flat_store_dwordx4 v[0:1], v[2:5] 529; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v6 530; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 531; GFX8PLUS-NEXT: ; return to shader part epilog 532; 533; GFX11-LABEL: buffer_load_v4i32_tfe: 534; GFX11: ; %bb.0: 535; GFX11-NEXT: v_mov_b32_e32 v2, 0 536; GFX11-NEXT: buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe 537; GFX11-NEXT: s_waitcnt vmcnt(0) 538; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off 539; GFX11-NEXT: v_mov_b32_e32 v0, v6 540; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 541; GFX11-NEXT: ; return to shader part epilog 542 %load = call { <4 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) 543 %data = extractvalue { <4 x i32>, i32 } %load, 0 544 store <4 x i32> %data, ptr addrspace(1) %out 545 %status = extractvalue { <4 x i32>, i32 } %load, 1 546 %fstatus = bitcast i32 %status to float 547 ret float %fstatus 548} 549 550define amdgpu_cs float @buffer_load_v4f32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) { 551; GFX6-LABEL: buffer_load_v4f32_tfe: 552; GFX6: ; %bb.0: 553; GFX6-NEXT: v_mov_b32_e32 v2, 0 554; GFX6-NEXT: buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe 555; GFX6-NEXT: s_mov_b32 s2, 0 556; GFX6-NEXT: s_mov_b32 s3, 0xf000 557; GFX6-NEXT: s_mov_b32 s0, s2 558; GFX6-NEXT: s_mov_b32 s1, s2 559; GFX6-NEXT: s_waitcnt vmcnt(0) 560; GFX6-NEXT: buffer_store_dwordx4 v[2:5], v[0:1], s[0:3], 0 addr64 561; GFX6-NEXT: v_mov_b32_e32 v0, v6 562; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 563; GFX6-NEXT: ; return to shader part epilog 564; 565; GFX8PLUS-LABEL: buffer_load_v4f32_tfe: 566; GFX8PLUS: ; %bb.0: 567; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0 568; GFX8PLUS-NEXT: buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe 569; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 570; GFX8PLUS-NEXT: flat_store_dwordx4 v[0:1], v[2:5] 571; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v6 572; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 573; GFX8PLUS-NEXT: ; return to shader part epilog 574; 575; GFX11-LABEL: buffer_load_v4f32_tfe: 576; GFX11: ; %bb.0: 577; GFX11-NEXT: v_mov_b32_e32 v2, 0 578; GFX11-NEXT: buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe 579; GFX11-NEXT: s_waitcnt vmcnt(0) 580; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off 581; GFX11-NEXT: v_mov_b32_e32 v0, v6 582; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 583; GFX11-NEXT: ; return to shader part epilog 584 %load = call { <4 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4f32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) 585 %data = extractvalue { <4 x float>, i32 } %load, 0 586 store <4 x float> %data, ptr addrspace(1) %out 587 %status = extractvalue { <4 x float>, i32 } %load, 1 588 %fstatus = bitcast i32 %status to float 589 ret float %fstatus 590} 591 592define amdgpu_cs float @buffer_load_v3i32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) { 593; GFX6-LABEL: buffer_load_v3i32_tfe: 594; GFX6: ; %bb.0: 595; GFX6-NEXT: v_mov_b32_e32 v2, 0 596; GFX6-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe 597; GFX6-NEXT: s_mov_b32 s2, 0 598; GFX6-NEXT: s_mov_b32 s3, 0xf000 599; GFX6-NEXT: s_mov_b32 s0, s2 600; GFX6-NEXT: s_mov_b32 s1, s2 601; GFX6-NEXT: s_waitcnt vmcnt(0) 602; GFX6-NEXT: buffer_store_dword v4, v[0:1], s[0:3], 0 addr64 offset:8 603; GFX6-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 604; GFX6-NEXT: v_mov_b32_e32 v0, v5 605; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 606; GFX6-NEXT: ; return to shader part epilog 607; 608; GFX8PLUS-LABEL: buffer_load_v3i32_tfe: 609; GFX8PLUS: ; %bb.0: 610; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0 611; GFX8PLUS-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe 612; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 613; GFX8PLUS-NEXT: flat_store_dwordx3 v[0:1], v[2:4] 614; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v5 615; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 616; GFX8PLUS-NEXT: ; return to shader part epilog 617; 618; GFX11-LABEL: buffer_load_v3i32_tfe: 619; GFX11: ; %bb.0: 620; GFX11-NEXT: v_mov_b32_e32 v2, 0 621; GFX11-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe 622; GFX11-NEXT: s_waitcnt vmcnt(0) 623; GFX11-NEXT: global_store_b96 v[0:1], v[2:4], off 624; GFX11-NEXT: v_mov_b32_e32 v0, v5 625; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 626; GFX11-NEXT: ; return to shader part epilog 627 %load = call { <3 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) 628 %data = extractvalue { <3 x i32>, i32 } %load, 0 629 store <3 x i32> %data, ptr addrspace(1) %out 630 %status = extractvalue { <3 x i32>, i32 } %load, 1 631 %fstatus = bitcast i32 %status to float 632 ret float %fstatus 633} 634 635define amdgpu_cs float @buffer_load_v3f32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) { 636; GFX6-LABEL: buffer_load_v3f32_tfe: 637; GFX6: ; %bb.0: 638; GFX6-NEXT: v_mov_b32_e32 v2, 0 639; GFX6-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe 640; GFX6-NEXT: s_mov_b32 s2, 0 641; GFX6-NEXT: s_mov_b32 s3, 0xf000 642; GFX6-NEXT: s_mov_b32 s0, s2 643; GFX6-NEXT: s_mov_b32 s1, s2 644; GFX6-NEXT: s_waitcnt vmcnt(0) 645; GFX6-NEXT: buffer_store_dword v4, v[0:1], s[0:3], 0 addr64 offset:8 646; GFX6-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 647; GFX6-NEXT: v_mov_b32_e32 v0, v5 648; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 649; GFX6-NEXT: ; return to shader part epilog 650; 651; GFX8PLUS-LABEL: buffer_load_v3f32_tfe: 652; GFX8PLUS: ; %bb.0: 653; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0 654; GFX8PLUS-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe 655; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 656; GFX8PLUS-NEXT: flat_store_dwordx3 v[0:1], v[2:4] 657; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v5 658; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 659; GFX8PLUS-NEXT: ; return to shader part epilog 660; 661; GFX11-LABEL: buffer_load_v3f32_tfe: 662; GFX11: ; %bb.0: 663; GFX11-NEXT: v_mov_b32_e32 v2, 0 664; GFX11-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe 665; GFX11-NEXT: s_waitcnt vmcnt(0) 666; GFX11-NEXT: global_store_b96 v[0:1], v[2:4], off 667; GFX11-NEXT: v_mov_b32_e32 v0, v5 668; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 669; GFX11-NEXT: ; return to shader part epilog 670 %load = call { <3 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3f32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) 671 %data = extractvalue { <3 x float>, i32 } %load, 0 672 store <3 x float> %data, ptr addrspace(1) %out 673 %status = extractvalue { <3 x float>, i32 } %load, 1 674 %fstatus = bitcast i32 %status to float 675 ret float %fstatus 676} 677 678define amdgpu_cs float @buffer_load_v2i32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) { 679; GFX6-LABEL: buffer_load_v2i32_tfe: 680; GFX6: ; %bb.0: 681; GFX6-NEXT: v_mov_b32_e32 v2, 0 682; GFX6-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe 683; GFX6-NEXT: s_mov_b32 s2, 0 684; GFX6-NEXT: s_mov_b32 s3, 0xf000 685; GFX6-NEXT: s_mov_b32 s0, s2 686; GFX6-NEXT: s_mov_b32 s1, s2 687; GFX6-NEXT: s_waitcnt vmcnt(0) 688; GFX6-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 689; GFX6-NEXT: v_mov_b32_e32 v0, v4 690; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 691; GFX6-NEXT: ; return to shader part epilog 692; 693; GFX8PLUS-LABEL: buffer_load_v2i32_tfe: 694; GFX8PLUS: ; %bb.0: 695; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0 696; GFX8PLUS-NEXT: buffer_load_format_xy v[2:4], v2, s[0:3], 0 idxen tfe 697; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 698; GFX8PLUS-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 699; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v4 700; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 701; GFX8PLUS-NEXT: ; return to shader part epilog 702; 703; GFX11-LABEL: buffer_load_v2i32_tfe: 704; GFX11: ; %bb.0: 705; GFX11-NEXT: v_mov_b32_e32 v2, 0 706; GFX11-NEXT: buffer_load_format_xy v[2:4], v2, s[0:3], 0 idxen tfe 707; GFX11-NEXT: s_waitcnt vmcnt(0) 708; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off 709; GFX11-NEXT: v_mov_b32_e32 v0, v4 710; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 711; GFX11-NEXT: ; return to shader part epilog 712 %load = call { <2 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) 713 %data = extractvalue { <2 x i32>, i32 } %load, 0 714 store <2 x i32> %data, ptr addrspace(1) %out 715 %status = extractvalue { <2 x i32>, i32 } %load, 1 716 %fstatus = bitcast i32 %status to float 717 ret float %fstatus 718} 719 720define amdgpu_cs float @buffer_load_v2f32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) { 721; GFX6-LABEL: buffer_load_v2f32_tfe: 722; GFX6: ; %bb.0: 723; GFX6-NEXT: v_mov_b32_e32 v2, 0 724; GFX6-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe 725; GFX6-NEXT: s_mov_b32 s2, 0 726; GFX6-NEXT: s_mov_b32 s3, 0xf000 727; GFX6-NEXT: s_mov_b32 s0, s2 728; GFX6-NEXT: s_mov_b32 s1, s2 729; GFX6-NEXT: s_waitcnt vmcnt(0) 730; GFX6-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 731; GFX6-NEXT: v_mov_b32_e32 v0, v4 732; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 733; GFX6-NEXT: ; return to shader part epilog 734; 735; GFX8PLUS-LABEL: buffer_load_v2f32_tfe: 736; GFX8PLUS: ; %bb.0: 737; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0 738; GFX8PLUS-NEXT: buffer_load_format_xy v[2:4], v2, s[0:3], 0 idxen tfe 739; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 740; GFX8PLUS-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 741; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v4 742; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 743; GFX8PLUS-NEXT: ; return to shader part epilog 744; 745; GFX11-LABEL: buffer_load_v2f32_tfe: 746; GFX11: ; %bb.0: 747; GFX11-NEXT: v_mov_b32_e32 v2, 0 748; GFX11-NEXT: buffer_load_format_xy v[2:4], v2, s[0:3], 0 idxen tfe 749; GFX11-NEXT: s_waitcnt vmcnt(0) 750; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off 751; GFX11-NEXT: v_mov_b32_e32 v0, v4 752; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 753; GFX11-NEXT: ; return to shader part epilog 754 %load = call { <2 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2f32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) 755 %data = extractvalue { <2 x float>, i32 } %load, 0 756 store <2 x float> %data, ptr addrspace(1) %out 757 %status = extractvalue { <2 x float>, i32 } %load, 1 758 %fstatus = bitcast i32 %status to float 759 ret float %fstatus 760} 761 762define amdgpu_cs float @buffer_load_i32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) { 763; GFX6-LABEL: buffer_load_i32_tfe: 764; GFX6: ; %bb.0: 765; GFX6-NEXT: v_mov_b32_e32 v2, 0 766; GFX6-NEXT: buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe 767; GFX6-NEXT: s_mov_b32 s2, 0 768; GFX6-NEXT: s_mov_b32 s3, 0xf000 769; GFX6-NEXT: s_mov_b32 s0, s2 770; GFX6-NEXT: s_mov_b32 s1, s2 771; GFX6-NEXT: s_waitcnt vmcnt(0) 772; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 773; GFX6-NEXT: v_mov_b32_e32 v0, v3 774; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 775; GFX6-NEXT: ; return to shader part epilog 776; 777; GFX8PLUS-LABEL: buffer_load_i32_tfe: 778; GFX8PLUS: ; %bb.0: 779; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0 780; GFX8PLUS-NEXT: buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe 781; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 782; GFX8PLUS-NEXT: flat_store_dword v[0:1], v2 783; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v3 784; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 785; GFX8PLUS-NEXT: ; return to shader part epilog 786; 787; GFX11-LABEL: buffer_load_i32_tfe: 788; GFX11: ; %bb.0: 789; GFX11-NEXT: v_mov_b32_e32 v2, 0 790; GFX11-NEXT: buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe 791; GFX11-NEXT: s_waitcnt vmcnt(0) 792; GFX11-NEXT: global_store_b32 v[0:1], v2, off 793; GFX11-NEXT: v_mov_b32_e32 v0, v3 794; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 795; GFX11-NEXT: ; return to shader part epilog 796 %load = call { i32, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) 797 %data = extractvalue { i32, i32 } %load, 0 798 store i32 %data, ptr addrspace(1) %out 799 %status = extractvalue { i32, i32 } %load, 1 800 %fstatus = bitcast i32 %status to float 801 ret float %fstatus 802} 803 804define amdgpu_cs float @buffer_load_f32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) { 805; GFX6-LABEL: buffer_load_f32_tfe: 806; GFX6: ; %bb.0: 807; GFX6-NEXT: v_mov_b32_e32 v2, 0 808; GFX6-NEXT: buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe 809; GFX6-NEXT: s_mov_b32 s2, 0 810; GFX6-NEXT: s_mov_b32 s3, 0xf000 811; GFX6-NEXT: s_mov_b32 s0, s2 812; GFX6-NEXT: s_mov_b32 s1, s2 813; GFX6-NEXT: s_waitcnt vmcnt(0) 814; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 815; GFX6-NEXT: v_mov_b32_e32 v0, v3 816; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 817; GFX6-NEXT: ; return to shader part epilog 818; 819; GFX8PLUS-LABEL: buffer_load_f32_tfe: 820; GFX8PLUS: ; %bb.0: 821; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0 822; GFX8PLUS-NEXT: buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe 823; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 824; GFX8PLUS-NEXT: flat_store_dword v[0:1], v2 825; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v3 826; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 827; GFX8PLUS-NEXT: ; return to shader part epilog 828; 829; GFX11-LABEL: buffer_load_f32_tfe: 830; GFX11: ; %bb.0: 831; GFX11-NEXT: v_mov_b32_e32 v2, 0 832; GFX11-NEXT: buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe 833; GFX11-NEXT: s_waitcnt vmcnt(0) 834; GFX11-NEXT: global_store_b32 v[0:1], v2, off 835; GFX11-NEXT: v_mov_b32_e32 v0, v3 836; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 837; GFX11-NEXT: ; return to shader part epilog 838 %load = call { float, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_f32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) 839 %data = extractvalue { float, i32 } %load, 0 840 store float %data, ptr addrspace(1) %out 841 %status = extractvalue { float, i32 } %load, 1 842 %fstatus = bitcast i32 %status to float 843 ret float %fstatus 844} 845 846declare float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32, i32) #0 847declare <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8), i32, i32, i32, i32) #0 848declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32) #0 849declare i32 @llvm.amdgcn.struct.ptr.buffer.load.format.i32(ptr addrspace(8), i32, i32, i32, i32) #0 850declare { <4 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0 851declare { <4 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0 852declare { <3 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0 853declare { <3 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0 854declare { <2 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0 855declare { <2 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0 856declare { i32, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0 857declare { float, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0 858attributes #0 = { nounwind readonly } 859