1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2;RUN: llc < %s -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefixes=GFX6 %s 3;RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefixes=GFX8PLUS %s 4;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck --check-prefixes=GFX11 %s 5;RUN: llc < %s -mtriple=amdgcn -mattr=-enable-prt-strict-null -mcpu=gfx1100 -verify-machineinstrs | FileCheck --check-prefixes=NOPRT %s 6 7define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load(ptr addrspace(8) inreg) { 8; GFX6-LABEL: buffer_load: 9; GFX6: ; %bb.0: ; %main_body 10; GFX6-NEXT: v_mov_b32_e32 v8, 0 11; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 0 idxen 12; GFX6-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], 0 idxen glc 13; GFX6-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], 0 idxen slc 14; GFX6-NEXT: s_waitcnt vmcnt(0) 15; GFX6-NEXT: ; return to shader part epilog 16; 17; GFX8PLUS-LABEL: buffer_load: 18; GFX8PLUS: ; %bb.0: ; %main_body 19; GFX8PLUS-NEXT: v_mov_b32_e32 v8, 0 20; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 0 idxen 21; GFX8PLUS-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], 0 idxen glc 22; GFX8PLUS-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], 0 idxen slc 23; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 24; GFX8PLUS-NEXT: ; return to shader part epilog 25; 26; GFX11-LABEL: buffer_load: 27; GFX11: ; %bb.0: ; %main_body 28; GFX11-NEXT: v_mov_b32_e32 v8, 0 29; GFX11-NEXT: s_clause 0x2 30; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 0 idxen 31; GFX11-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], 0 idxen glc 32; GFX11-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], 0 idxen slc 33; GFX11-NEXT: s_waitcnt vmcnt(0) 34; GFX11-NEXT: ; return to shader part epilog 35; 36; NOPRT-LABEL: buffer_load: 37; NOPRT: ; %bb.0: ; %main_body 38; NOPRT-NEXT: v_mov_b32_e32 v8, 0 39; NOPRT-NEXT: s_clause 0x2 40; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 0 idxen 41; NOPRT-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], 0 idxen glc 42; NOPRT-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], 0 idxen slc 43; NOPRT-NEXT: s_waitcnt vmcnt(0) 44; NOPRT-NEXT: ; return to shader part epilog 45main_body: 46 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 0) 47 %data_glc = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 1) 48 %data_slc = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 0, i32 2) 49 %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %data, 0 50 %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %data_glc, 1 51 %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %data_slc, 2 52 ret {<4 x float>, <4 x float>, <4 x float>} %r2 53} 54 55define amdgpu_ps <4 x float> @buffer_load_immoffs(ptr addrspace(8) inreg) { 56; GFX6-LABEL: buffer_load_immoffs: 57; GFX6: ; %bb.0: ; %main_body 58; GFX6-NEXT: v_mov_b32_e32 v0, 0 59; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:42 60; GFX6-NEXT: s_waitcnt vmcnt(0) 61; GFX6-NEXT: ; return to shader part epilog 62; 63; GFX8PLUS-LABEL: buffer_load_immoffs: 64; GFX8PLUS: ; %bb.0: ; %main_body 65; GFX8PLUS-NEXT: v_mov_b32_e32 v0, 0 66; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:42 67; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 68; GFX8PLUS-NEXT: ; return to shader part epilog 69; 70; GFX11-LABEL: buffer_load_immoffs: 71; GFX11: ; %bb.0: ; %main_body 72; GFX11-NEXT: v_mov_b32_e32 v0, 0 73; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:42 74; GFX11-NEXT: s_waitcnt vmcnt(0) 75; GFX11-NEXT: ; return to shader part epilog 76; 77; NOPRT-LABEL: buffer_load_immoffs: 78; NOPRT: ; %bb.0: ; %main_body 79; NOPRT-NEXT: v_mov_b32_e32 v0, 0 80; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:42 81; NOPRT-NEXT: s_waitcnt vmcnt(0) 82; NOPRT-NEXT: ; return to shader part epilog 83main_body: 84 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 42, i32 0, i32 0) 85 ret <4 x float> %data 86} 87 88define amdgpu_ps <4 x float> @buffer_load_immoffs_large(ptr addrspace(8) inreg) { 89; GFX6-LABEL: buffer_load_immoffs_large: 90; GFX6: ; %bb.0: ; %main_body 91; GFX6-NEXT: v_mov_b32_e32 v8, 0 92; GFX6-NEXT: s_movk_i32 s4, 0x7ffc 93; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 60 idxen offset:4092 94; GFX6-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], s4 idxen offset:4092 95; GFX6-NEXT: s_mov_b32 s4, 0x8ffc 96; GFX6-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], s4 idxen offset:4 97; GFX6-NEXT: s_waitcnt vmcnt(1) 98; GFX6-NEXT: v_add_f32_e32 v3, v3, v7 99; GFX6-NEXT: v_add_f32_e32 v2, v2, v6 100; GFX6-NEXT: v_add_f32_e32 v1, v1, v5 101; GFX6-NEXT: v_add_f32_e32 v0, v0, v4 102; GFX6-NEXT: s_waitcnt vmcnt(0) 103; GFX6-NEXT: v_add_f32_e32 v0, v8, v0 104; GFX6-NEXT: v_add_f32_e32 v1, v9, v1 105; GFX6-NEXT: v_add_f32_e32 v2, v10, v2 106; GFX6-NEXT: v_add_f32_e32 v3, v11, v3 107; GFX6-NEXT: ; return to shader part epilog 108; 109; GFX8PLUS-LABEL: buffer_load_immoffs_large: 110; GFX8PLUS: ; %bb.0: ; %main_body 111; GFX8PLUS-NEXT: v_mov_b32_e32 v8, 0 112; GFX8PLUS-NEXT: s_movk_i32 s4, 0x7ffc 113; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 60 idxen offset:4092 114; GFX8PLUS-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], s4 idxen offset:4092 115; GFX8PLUS-NEXT: s_mov_b32 s4, 0x8ffc 116; GFX8PLUS-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], s4 idxen offset:4 117; GFX8PLUS-NEXT: s_waitcnt vmcnt(1) 118; GFX8PLUS-NEXT: v_add_f32_e32 v3, v3, v7 119; GFX8PLUS-NEXT: v_add_f32_e32 v2, v2, v6 120; GFX8PLUS-NEXT: v_add_f32_e32 v1, v1, v5 121; GFX8PLUS-NEXT: v_add_f32_e32 v0, v0, v4 122; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 123; GFX8PLUS-NEXT: v_add_f32_e32 v0, v8, v0 124; GFX8PLUS-NEXT: v_add_f32_e32 v1, v9, v1 125; GFX8PLUS-NEXT: v_add_f32_e32 v2, v10, v2 126; GFX8PLUS-NEXT: v_add_f32_e32 v3, v11, v3 127; GFX8PLUS-NEXT: ; return to shader part epilog 128; 129; GFX11-LABEL: buffer_load_immoffs_large: 130; GFX11: ; %bb.0: ; %main_body 131; GFX11-NEXT: v_mov_b32_e32 v8, 0 132; GFX11-NEXT: s_movk_i32 s4, 0x7ffc 133; GFX11-NEXT: s_clause 0x1 134; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 60 idxen offset:4092 135; GFX11-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], s4 idxen offset:4092 136; GFX11-NEXT: s_mov_b32 s4, 0x8ffc 137; GFX11-NEXT: s_waitcnt vmcnt(0) 138; GFX11-NEXT: v_add_f32_e32 v1, v1, v5 139; GFX11-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], s4 idxen offset:4 140; GFX11-NEXT: v_dual_add_f32 v0, v0, v4 :: v_dual_add_f32 v3, v3, v7 141; GFX11-NEXT: s_waitcnt vmcnt(0) 142; GFX11-NEXT: v_dual_add_f32 v2, v2, v6 :: v_dual_add_f32 v1, v9, v1 143; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 144; GFX11-NEXT: v_dual_add_f32 v0, v8, v0 :: v_dual_add_f32 v3, v11, v3 145; GFX11-NEXT: v_add_f32_e32 v2, v10, v2 146; GFX11-NEXT: ; return to shader part epilog 147; 148; NOPRT-LABEL: buffer_load_immoffs_large: 149; NOPRT: ; %bb.0: ; %main_body 150; NOPRT-NEXT: v_mov_b32_e32 v8, 0 151; NOPRT-NEXT: s_movk_i32 s4, 0x7ffc 152; NOPRT-NEXT: s_clause 0x1 153; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 60 idxen offset:4092 154; NOPRT-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], s4 idxen offset:4092 155; NOPRT-NEXT: s_mov_b32 s4, 0x8ffc 156; NOPRT-NEXT: s_waitcnt vmcnt(0) 157; NOPRT-NEXT: v_add_f32_e32 v1, v1, v5 158; NOPRT-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], s4 idxen offset:4 159; NOPRT-NEXT: v_dual_add_f32 v0, v0, v4 :: v_dual_add_f32 v3, v3, v7 160; NOPRT-NEXT: s_waitcnt vmcnt(0) 161; NOPRT-NEXT: v_dual_add_f32 v2, v2, v6 :: v_dual_add_f32 v1, v9, v1 162; NOPRT-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 163; NOPRT-NEXT: v_dual_add_f32 v0, v8, v0 :: v_dual_add_f32 v3, v11, v3 164; NOPRT-NEXT: v_add_f32_e32 v2, v10, v2 165; NOPRT-NEXT: ; return to shader part epilog 166main_body: 167 %d.0 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 4092, i32 60, i32 0) 168 %d.1 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 4092, i32 32764, i32 0) 169 %d.2 = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 4, i32 36860, i32 0) 170 %d.3 = fadd <4 x float> %d.0, %d.1 171 %data = fadd <4 x float> %d.2, %d.3 172 ret <4 x float> %data 173} 174 175define amdgpu_ps <4 x float> @buffer_load_voffset_large_12bit(ptr addrspace(8) inreg) { 176; GFX6-LABEL: buffer_load_voffset_large_12bit: 177; GFX6: ; %bb.0: ; %main_body 178; GFX6-NEXT: v_mov_b32_e32 v0, 0 179; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092 180; GFX6-NEXT: s_waitcnt vmcnt(0) 181; GFX6-NEXT: ; return to shader part epilog 182; 183; GFX8PLUS-LABEL: buffer_load_voffset_large_12bit: 184; GFX8PLUS: ; %bb.0: ; %main_body 185; GFX8PLUS-NEXT: v_mov_b32_e32 v0, 0 186; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092 187; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 188; GFX8PLUS-NEXT: ; return to shader part epilog 189; 190; GFX11-LABEL: buffer_load_voffset_large_12bit: 191; GFX11: ; %bb.0: ; %main_body 192; GFX11-NEXT: v_mov_b32_e32 v0, 0 193; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092 194; GFX11-NEXT: s_waitcnt vmcnt(0) 195; GFX11-NEXT: ; return to shader part epilog 196; 197; NOPRT-LABEL: buffer_load_voffset_large_12bit: 198; NOPRT: ; %bb.0: ; %main_body 199; NOPRT-NEXT: v_mov_b32_e32 v0, 0 200; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092 201; NOPRT-NEXT: s_waitcnt vmcnt(0) 202; NOPRT-NEXT: ; return to shader part epilog 203main_body: 204 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 4092, i32 0, i32 0) 205 ret <4 x float> %data 206} 207 208define amdgpu_ps <4 x float> @buffer_load_voffset_large_13bit(ptr addrspace(8) inreg) { 209; GFX6-LABEL: buffer_load_voffset_large_13bit: 210; GFX6: ; %bb.0: ; %main_body 211; GFX6-NEXT: s_mov_b32 s4, 0 212; GFX6-NEXT: v_mov_b32_e32 v1, 0x1000 213; GFX6-NEXT: v_mov_b32_e32 v0, s4 214; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 215; GFX6-NEXT: s_waitcnt vmcnt(0) 216; GFX6-NEXT: ; return to shader part epilog 217; 218; GFX8PLUS-LABEL: buffer_load_voffset_large_13bit: 219; GFX8PLUS: ; %bb.0: ; %main_body 220; GFX8PLUS-NEXT: s_mov_b32 s4, 0 221; GFX8PLUS-NEXT: v_mov_b32_e32 v1, 0x1000 222; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4 223; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 224; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 225; GFX8PLUS-NEXT: ; return to shader part epilog 226; 227; GFX11-LABEL: buffer_load_voffset_large_13bit: 228; GFX11: ; %bb.0: ; %main_body 229; GFX11-NEXT: s_mov_b32 s4, 0 230; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 231; GFX11-NEXT: v_dual_mov_b32 v1, 0x1000 :: v_dual_mov_b32 v0, s4 232; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 233; GFX11-NEXT: s_waitcnt vmcnt(0) 234; GFX11-NEXT: ; return to shader part epilog 235; 236; NOPRT-LABEL: buffer_load_voffset_large_13bit: 237; NOPRT: ; %bb.0: ; %main_body 238; NOPRT-NEXT: s_mov_b32 s4, 0 239; NOPRT-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 240; NOPRT-NEXT: v_dual_mov_b32 v1, 0x1000 :: v_dual_mov_b32 v0, s4 241; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 242; NOPRT-NEXT: s_waitcnt vmcnt(0) 243; NOPRT-NEXT: ; return to shader part epilog 244main_body: 245 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 8188, i32 0, i32 0) 246 ret <4 x float> %data 247} 248 249define amdgpu_ps <4 x float> @buffer_load_voffset_large_16bit(ptr addrspace(8) inreg) { 250; GFX6-LABEL: buffer_load_voffset_large_16bit: 251; GFX6: ; %bb.0: ; %main_body 252; GFX6-NEXT: s_mov_b32 s4, 0 253; GFX6-NEXT: v_mov_b32_e32 v1, 0xf000 254; GFX6-NEXT: v_mov_b32_e32 v0, s4 255; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 256; GFX6-NEXT: s_waitcnt vmcnt(0) 257; GFX6-NEXT: ; return to shader part epilog 258; 259; GFX8PLUS-LABEL: buffer_load_voffset_large_16bit: 260; GFX8PLUS: ; %bb.0: ; %main_body 261; GFX8PLUS-NEXT: s_mov_b32 s4, 0 262; GFX8PLUS-NEXT: v_mov_b32_e32 v1, 0xf000 263; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4 264; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 265; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 266; GFX8PLUS-NEXT: ; return to shader part epilog 267; 268; GFX11-LABEL: buffer_load_voffset_large_16bit: 269; GFX11: ; %bb.0: ; %main_body 270; GFX11-NEXT: s_mov_b32 s4, 0 271; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 272; GFX11-NEXT: v_dual_mov_b32 v1, 0xf000 :: v_dual_mov_b32 v0, s4 273; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 274; GFX11-NEXT: s_waitcnt vmcnt(0) 275; GFX11-NEXT: ; return to shader part epilog 276; 277; NOPRT-LABEL: buffer_load_voffset_large_16bit: 278; NOPRT: ; %bb.0: ; %main_body 279; NOPRT-NEXT: s_mov_b32 s4, 0 280; NOPRT-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 281; NOPRT-NEXT: v_dual_mov_b32 v1, 0xf000 :: v_dual_mov_b32 v0, s4 282; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 283; NOPRT-NEXT: s_waitcnt vmcnt(0) 284; NOPRT-NEXT: ; return to shader part epilog 285main_body: 286 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 65532, i32 0, i32 0) 287 ret <4 x float> %data 288} 289 290define amdgpu_ps <4 x float> @buffer_load_voffset_large_23bit(ptr addrspace(8) inreg) { 291; GFX6-LABEL: buffer_load_voffset_large_23bit: 292; GFX6: ; %bb.0: ; %main_body 293; GFX6-NEXT: s_mov_b32 s4, 0 294; GFX6-NEXT: v_mov_b32_e32 v1, 0x7ff000 295; GFX6-NEXT: v_mov_b32_e32 v0, s4 296; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 297; GFX6-NEXT: s_waitcnt vmcnt(0) 298; GFX6-NEXT: ; return to shader part epilog 299; 300; GFX8PLUS-LABEL: buffer_load_voffset_large_23bit: 301; GFX8PLUS: ; %bb.0: ; %main_body 302; GFX8PLUS-NEXT: s_mov_b32 s4, 0 303; GFX8PLUS-NEXT: v_mov_b32_e32 v1, 0x7ff000 304; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4 305; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 306; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 307; GFX8PLUS-NEXT: ; return to shader part epilog 308; 309; GFX11-LABEL: buffer_load_voffset_large_23bit: 310; GFX11: ; %bb.0: ; %main_body 311; GFX11-NEXT: s_mov_b32 s4, 0 312; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 313; GFX11-NEXT: v_dual_mov_b32 v1, 0x7ff000 :: v_dual_mov_b32 v0, s4 314; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 315; GFX11-NEXT: s_waitcnt vmcnt(0) 316; GFX11-NEXT: ; return to shader part epilog 317; 318; NOPRT-LABEL: buffer_load_voffset_large_23bit: 319; NOPRT: ; %bb.0: ; %main_body 320; NOPRT-NEXT: s_mov_b32 s4, 0 321; NOPRT-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 322; NOPRT-NEXT: v_dual_mov_b32 v1, 0x7ff000 :: v_dual_mov_b32 v0, s4 323; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 324; NOPRT-NEXT: s_waitcnt vmcnt(0) 325; NOPRT-NEXT: ; return to shader part epilog 326main_body: 327 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 8388604, i32 0, i32 0) 328 ret <4 x float> %data 329} 330 331define amdgpu_ps <4 x float> @buffer_load_voffset_large_24bit(ptr addrspace(8) inreg) { 332; GFX6-LABEL: buffer_load_voffset_large_24bit: 333; GFX6: ; %bb.0: ; %main_body 334; GFX6-NEXT: s_mov_b32 s4, 0 335; GFX6-NEXT: v_mov_b32_e32 v1, 0xfff000 336; GFX6-NEXT: v_mov_b32_e32 v0, s4 337; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 338; GFX6-NEXT: s_waitcnt vmcnt(0) 339; GFX6-NEXT: ; return to shader part epilog 340; 341; GFX8PLUS-LABEL: buffer_load_voffset_large_24bit: 342; GFX8PLUS: ; %bb.0: ; %main_body 343; GFX8PLUS-NEXT: s_mov_b32 s4, 0 344; GFX8PLUS-NEXT: v_mov_b32_e32 v1, 0xfff000 345; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4 346; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 347; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 348; GFX8PLUS-NEXT: ; return to shader part epilog 349; 350; GFX11-LABEL: buffer_load_voffset_large_24bit: 351; GFX11: ; %bb.0: ; %main_body 352; GFX11-NEXT: s_mov_b32 s4, 0 353; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 354; GFX11-NEXT: v_dual_mov_b32 v1, 0xfff000 :: v_dual_mov_b32 v0, s4 355; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 356; GFX11-NEXT: s_waitcnt vmcnt(0) 357; GFX11-NEXT: ; return to shader part epilog 358; 359; NOPRT-LABEL: buffer_load_voffset_large_24bit: 360; NOPRT: ; %bb.0: ; %main_body 361; NOPRT-NEXT: s_mov_b32 s4, 0 362; NOPRT-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 363; NOPRT-NEXT: v_dual_mov_b32 v1, 0xfff000 :: v_dual_mov_b32 v0, s4 364; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 365; NOPRT-NEXT: s_waitcnt vmcnt(0) 366; NOPRT-NEXT: ; return to shader part epilog 367main_body: 368 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 16777212, i32 0, i32 0) 369 ret <4 x float> %data 370} 371 372define amdgpu_ps <4 x float> @buffer_load_idx(ptr addrspace(8) inreg, i32) { 373; GFX6-LABEL: buffer_load_idx: 374; GFX6: ; %bb.0: ; %main_body 375; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen 376; GFX6-NEXT: s_waitcnt vmcnt(0) 377; GFX6-NEXT: ; return to shader part epilog 378; 379; GFX8PLUS-LABEL: buffer_load_idx: 380; GFX8PLUS: ; %bb.0: ; %main_body 381; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen 382; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 383; GFX8PLUS-NEXT: ; return to shader part epilog 384; 385; GFX11-LABEL: buffer_load_idx: 386; GFX11: ; %bb.0: ; %main_body 387; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen 388; GFX11-NEXT: s_waitcnt vmcnt(0) 389; GFX11-NEXT: ; return to shader part epilog 390; 391; NOPRT-LABEL: buffer_load_idx: 392; NOPRT: ; %bb.0: ; %main_body 393; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen 394; NOPRT-NEXT: s_waitcnt vmcnt(0) 395; NOPRT-NEXT: ; return to shader part epilog 396main_body: 397 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 %1, i32 0, i32 0, i32 0) 398 ret <4 x float> %data 399} 400 401define amdgpu_ps <4 x float> @buffer_load_ofs(ptr addrspace(8) inreg, i32) { 402; GFX6-LABEL: buffer_load_ofs: 403; GFX6: ; %bb.0: ; %main_body 404; GFX6-NEXT: s_mov_b32 s4, 0 405; GFX6-NEXT: v_mov_b32_e32 v1, v0 406; GFX6-NEXT: v_mov_b32_e32 v0, s4 407; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen 408; GFX6-NEXT: s_waitcnt vmcnt(0) 409; GFX6-NEXT: ; return to shader part epilog 410; 411; GFX8PLUS-LABEL: buffer_load_ofs: 412; GFX8PLUS: ; %bb.0: ; %main_body 413; GFX8PLUS-NEXT: s_mov_b32 s4, 0 414; GFX8PLUS-NEXT: v_mov_b32_e32 v1, v0 415; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4 416; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen 417; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 418; GFX8PLUS-NEXT: ; return to shader part epilog 419; 420; GFX11-LABEL: buffer_load_ofs: 421; GFX11: ; %bb.0: ; %main_body 422; GFX11-NEXT: s_mov_b32 s4, 0 423; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 424; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4 425; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen 426; GFX11-NEXT: s_waitcnt vmcnt(0) 427; GFX11-NEXT: ; return to shader part epilog 428; 429; NOPRT-LABEL: buffer_load_ofs: 430; NOPRT: ; %bb.0: ; %main_body 431; NOPRT-NEXT: s_mov_b32 s4, 0 432; NOPRT-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 433; NOPRT-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4 434; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen 435; NOPRT-NEXT: s_waitcnt vmcnt(0) 436; NOPRT-NEXT: ; return to shader part epilog 437main_body: 438 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 %1, i32 0, i32 0) 439 ret <4 x float> %data 440} 441 442define amdgpu_ps <4 x float> @buffer_load_ofs_imm(ptr addrspace(8) inreg, i32) { 443; GFX6-LABEL: buffer_load_ofs_imm: 444; GFX6: ; %bb.0: ; %main_body 445; GFX6-NEXT: s_mov_b32 s4, 0 446; GFX6-NEXT: v_mov_b32_e32 v1, v0 447; GFX6-NEXT: v_mov_b32_e32 v0, s4 448; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:60 449; GFX6-NEXT: s_waitcnt vmcnt(0) 450; GFX6-NEXT: ; return to shader part epilog 451; 452; GFX8PLUS-LABEL: buffer_load_ofs_imm: 453; GFX8PLUS: ; %bb.0: ; %main_body 454; GFX8PLUS-NEXT: s_mov_b32 s4, 0 455; GFX8PLUS-NEXT: v_mov_b32_e32 v1, v0 456; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4 457; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:60 458; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 459; GFX8PLUS-NEXT: ; return to shader part epilog 460; 461; GFX11-LABEL: buffer_load_ofs_imm: 462; GFX11: ; %bb.0: ; %main_body 463; GFX11-NEXT: s_mov_b32 s4, 0 464; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 465; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4 466; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:60 467; GFX11-NEXT: s_waitcnt vmcnt(0) 468; GFX11-NEXT: ; return to shader part epilog 469; 470; NOPRT-LABEL: buffer_load_ofs_imm: 471; NOPRT: ; %bb.0: ; %main_body 472; NOPRT-NEXT: s_mov_b32 s4, 0 473; NOPRT-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 474; NOPRT-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4 475; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:60 476; NOPRT-NEXT: s_waitcnt vmcnt(0) 477; NOPRT-NEXT: ; return to shader part epilog 478main_body: 479 %ofs = add i32 %1, 60 480 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 0, i32 %ofs, i32 0, i32 0) 481 ret <4 x float> %data 482} 483 484define amdgpu_ps <4 x float> @buffer_load_both(ptr addrspace(8) inreg, i32, i32) { 485; GFX6-LABEL: buffer_load_both: 486; GFX6: ; %bb.0: ; %main_body 487; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen 488; GFX6-NEXT: s_waitcnt vmcnt(0) 489; GFX6-NEXT: ; return to shader part epilog 490; 491; GFX8PLUS-LABEL: buffer_load_both: 492; GFX8PLUS: ; %bb.0: ; %main_body 493; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen 494; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 495; GFX8PLUS-NEXT: ; return to shader part epilog 496; 497; GFX11-LABEL: buffer_load_both: 498; GFX11: ; %bb.0: ; %main_body 499; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen 500; GFX11-NEXT: s_waitcnt vmcnt(0) 501; GFX11-NEXT: ; return to shader part epilog 502; 503; NOPRT-LABEL: buffer_load_both: 504; NOPRT: ; %bb.0: ; %main_body 505; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen 506; NOPRT-NEXT: s_waitcnt vmcnt(0) 507; NOPRT-NEXT: ; return to shader part epilog 508main_body: 509 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 %1, i32 %2, i32 0, i32 0) 510 ret <4 x float> %data 511} 512 513define amdgpu_ps <4 x float> @buffer_load_both_reversed(ptr addrspace(8) inreg, i32, i32) { 514; GFX6-LABEL: buffer_load_both_reversed: 515; GFX6: ; %bb.0: ; %main_body 516; GFX6-NEXT: v_mov_b32_e32 v2, v0 517; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], 0 idxen offen 518; GFX6-NEXT: s_waitcnt vmcnt(0) 519; GFX6-NEXT: ; return to shader part epilog 520; 521; GFX8PLUS-LABEL: buffer_load_both_reversed: 522; GFX8PLUS: ; %bb.0: ; %main_body 523; GFX8PLUS-NEXT: v_mov_b32_e32 v2, v0 524; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], 0 idxen offen 525; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 526; GFX8PLUS-NEXT: ; return to shader part epilog 527; 528; GFX11-LABEL: buffer_load_both_reversed: 529; GFX11: ; %bb.0: ; %main_body 530; GFX11-NEXT: v_mov_b32_e32 v2, v0 531; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], 0 idxen offen 532; GFX11-NEXT: s_waitcnt vmcnt(0) 533; GFX11-NEXT: ; return to shader part epilog 534; 535; NOPRT-LABEL: buffer_load_both_reversed: 536; NOPRT: ; %bb.0: ; %main_body 537; NOPRT-NEXT: v_mov_b32_e32 v2, v0 538; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], 0 idxen offen 539; NOPRT-NEXT: s_waitcnt vmcnt(0) 540; NOPRT-NEXT: ; return to shader part epilog 541main_body: 542 %data = call <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8) %0, i32 %2, i32 %1, i32 0, i32 0) 543 ret <4 x float> %data 544} 545 546define amdgpu_ps float @buffer_load_x(ptr addrspace(8) inreg %rsrc) { 547; GFX6-LABEL: buffer_load_x: 548; GFX6: ; %bb.0: ; %main_body 549; GFX6-NEXT: v_mov_b32_e32 v0, 0 550; GFX6-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen 551; GFX6-NEXT: s_waitcnt vmcnt(0) 552; GFX6-NEXT: ; return to shader part epilog 553; 554; GFX8PLUS-LABEL: buffer_load_x: 555; GFX8PLUS: ; %bb.0: ; %main_body 556; GFX8PLUS-NEXT: v_mov_b32_e32 v0, 0 557; GFX8PLUS-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen 558; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 559; GFX8PLUS-NEXT: ; return to shader part epilog 560; 561; GFX11-LABEL: buffer_load_x: 562; GFX11: ; %bb.0: ; %main_body 563; GFX11-NEXT: v_mov_b32_e32 v0, 0 564; GFX11-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen 565; GFX11-NEXT: s_waitcnt vmcnt(0) 566; GFX11-NEXT: ; return to shader part epilog 567; 568; NOPRT-LABEL: buffer_load_x: 569; NOPRT: ; %bb.0: ; %main_body 570; NOPRT-NEXT: v_mov_b32_e32 v0, 0 571; NOPRT-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen 572; NOPRT-NEXT: s_waitcnt vmcnt(0) 573; NOPRT-NEXT: ; return to shader part epilog 574main_body: 575 %data = call float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) 576 ret float %data 577} 578 579define amdgpu_ps float @buffer_load_x_i32(ptr addrspace(8) inreg %rsrc) { 580; GFX6-LABEL: buffer_load_x_i32: 581; GFX6: ; %bb.0: ; %main_body 582; GFX6-NEXT: v_mov_b32_e32 v0, 0 583; GFX6-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen 584; GFX6-NEXT: s_waitcnt vmcnt(0) 585; GFX6-NEXT: ; return to shader part epilog 586; 587; GFX8PLUS-LABEL: buffer_load_x_i32: 588; GFX8PLUS: ; %bb.0: ; %main_body 589; GFX8PLUS-NEXT: v_mov_b32_e32 v0, 0 590; GFX8PLUS-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen 591; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 592; GFX8PLUS-NEXT: ; return to shader part epilog 593; 594; GFX11-LABEL: buffer_load_x_i32: 595; GFX11: ; %bb.0: ; %main_body 596; GFX11-NEXT: v_mov_b32_e32 v0, 0 597; GFX11-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen 598; GFX11-NEXT: s_waitcnt vmcnt(0) 599; GFX11-NEXT: ; return to shader part epilog 600; 601; NOPRT-LABEL: buffer_load_x_i32: 602; NOPRT: ; %bb.0: ; %main_body 603; NOPRT-NEXT: v_mov_b32_e32 v0, 0 604; NOPRT-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen 605; NOPRT-NEXT: s_waitcnt vmcnt(0) 606; NOPRT-NEXT: ; return to shader part epilog 607main_body: 608 %data = call i32 @llvm.amdgcn.struct.ptr.buffer.load.format.i32(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) 609 %fdata = bitcast i32 %data to float 610 ret float %fdata 611} 612 613define amdgpu_ps <2 x float> @buffer_load_xy(ptr addrspace(8) inreg %rsrc) { 614; GFX6-LABEL: buffer_load_xy: 615; GFX6: ; %bb.0: ; %main_body 616; GFX6-NEXT: v_mov_b32_e32 v0, 0 617; GFX6-NEXT: buffer_load_format_xy v[0:1], v0, s[0:3], 0 idxen 618; GFX6-NEXT: s_waitcnt vmcnt(0) 619; GFX6-NEXT: ; return to shader part epilog 620; 621; GFX8PLUS-LABEL: buffer_load_xy: 622; GFX8PLUS: ; %bb.0: ; %main_body 623; GFX8PLUS-NEXT: v_mov_b32_e32 v0, 0 624; GFX8PLUS-NEXT: buffer_load_format_xy v[0:1], v0, s[0:3], 0 idxen 625; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 626; GFX8PLUS-NEXT: ; return to shader part epilog 627; 628; GFX11-LABEL: buffer_load_xy: 629; GFX11: ; %bb.0: ; %main_body 630; GFX11-NEXT: v_mov_b32_e32 v0, 0 631; GFX11-NEXT: buffer_load_format_xy v[0:1], v0, s[0:3], 0 idxen 632; GFX11-NEXT: s_waitcnt vmcnt(0) 633; GFX11-NEXT: ; return to shader part epilog 634; 635; NOPRT-LABEL: buffer_load_xy: 636; NOPRT: ; %bb.0: ; %main_body 637; NOPRT-NEXT: v_mov_b32_e32 v0, 0 638; NOPRT-NEXT: buffer_load_format_xy v[0:1], v0, s[0:3], 0 idxen 639; NOPRT-NEXT: s_waitcnt vmcnt(0) 640; NOPRT-NEXT: ; return to shader part epilog 641main_body: 642 %data = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) 643 ret <2 x float> %data 644} 645 646define amdgpu_cs float @buffer_load_v4i32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) { 647; GFX6-LABEL: buffer_load_v4i32_tfe: 648; GFX6: ; %bb.0: 649; GFX6-NEXT: v_mov_b32_e32 v2, 0 650; GFX6-NEXT: v_mov_b32_e32 v3, v2 651; GFX6-NEXT: v_mov_b32_e32 v4, v2 652; GFX6-NEXT: v_mov_b32_e32 v5, v2 653; GFX6-NEXT: v_mov_b32_e32 v6, v2 654; GFX6-NEXT: buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe 655; GFX6-NEXT: s_mov_b32 s2, 0 656; GFX6-NEXT: s_mov_b32 s3, 0xf000 657; GFX6-NEXT: s_mov_b32 s0, s2 658; GFX6-NEXT: s_mov_b32 s1, s2 659; GFX6-NEXT: s_waitcnt vmcnt(0) 660; GFX6-NEXT: buffer_store_dwordx4 v[2:5], v[0:1], s[0:3], 0 addr64 661; GFX6-NEXT: v_mov_b32_e32 v0, v6 662; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 663; GFX6-NEXT: ; return to shader part epilog 664; 665; GFX8PLUS-LABEL: buffer_load_v4i32_tfe: 666; GFX8PLUS: ; %bb.0: 667; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0 668; GFX8PLUS-NEXT: v_mov_b32_e32 v3, v2 669; GFX8PLUS-NEXT: v_mov_b32_e32 v4, v2 670; GFX8PLUS-NEXT: v_mov_b32_e32 v5, v2 671; GFX8PLUS-NEXT: v_mov_b32_e32 v6, v2 672; GFX8PLUS-NEXT: buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe 673; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 674; GFX8PLUS-NEXT: flat_store_dwordx4 v[0:1], v[2:5] 675; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v6 676; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 677; GFX8PLUS-NEXT: ; return to shader part epilog 678; 679; GFX11-LABEL: buffer_load_v4i32_tfe: 680; GFX11: ; %bb.0: 681; GFX11-NEXT: v_mov_b32_e32 v2, 0 682; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 683; GFX11-NEXT: v_mov_b32_e32 v3, v2 684; GFX11-NEXT: v_mov_b32_e32 v4, v2 685; GFX11-NEXT: v_mov_b32_e32 v5, v2 686; GFX11-NEXT: v_mov_b32_e32 v6, v2 687; GFX11-NEXT: buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe 688; GFX11-NEXT: s_waitcnt vmcnt(0) 689; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off 690; GFX11-NEXT: v_mov_b32_e32 v0, v6 691; GFX11-NEXT: ; return to shader part epilog 692; 693; NOPRT-LABEL: buffer_load_v4i32_tfe: 694; NOPRT: ; %bb.0: 695; NOPRT-NEXT: v_mov_b32_e32 v6, 0 696; NOPRT-NEXT: buffer_load_format_xyzw v[2:6], v6, s[0:3], 0 idxen tfe 697; NOPRT-NEXT: s_waitcnt vmcnt(0) 698; NOPRT-NEXT: global_store_b128 v[0:1], v[2:5], off 699; NOPRT-NEXT: v_mov_b32_e32 v0, v6 700; NOPRT-NEXT: ; return to shader part epilog 701 %load = call { <4 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) 702 %data = extractvalue { <4 x i32>, i32 } %load, 0 703 store <4 x i32> %data, ptr addrspace(1) %out 704 %status = extractvalue { <4 x i32>, i32 } %load, 1 705 %fstatus = bitcast i32 %status to float 706 ret float %fstatus 707} 708 709define amdgpu_cs float @buffer_load_v4f32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) { 710; GFX6-LABEL: buffer_load_v4f32_tfe: 711; GFX6: ; %bb.0: 712; GFX6-NEXT: v_mov_b32_e32 v2, 0 713; GFX6-NEXT: v_mov_b32_e32 v3, v2 714; GFX6-NEXT: v_mov_b32_e32 v4, v2 715; GFX6-NEXT: v_mov_b32_e32 v5, v2 716; GFX6-NEXT: v_mov_b32_e32 v6, v2 717; GFX6-NEXT: buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe 718; GFX6-NEXT: s_mov_b32 s2, 0 719; GFX6-NEXT: s_mov_b32 s3, 0xf000 720; GFX6-NEXT: s_mov_b32 s0, s2 721; GFX6-NEXT: s_mov_b32 s1, s2 722; GFX6-NEXT: s_waitcnt vmcnt(0) 723; GFX6-NEXT: buffer_store_dwordx4 v[2:5], v[0:1], s[0:3], 0 addr64 724; GFX6-NEXT: v_mov_b32_e32 v0, v6 725; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 726; GFX6-NEXT: ; return to shader part epilog 727; 728; GFX8PLUS-LABEL: buffer_load_v4f32_tfe: 729; GFX8PLUS: ; %bb.0: 730; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0 731; GFX8PLUS-NEXT: v_mov_b32_e32 v3, v2 732; GFX8PLUS-NEXT: v_mov_b32_e32 v4, v2 733; GFX8PLUS-NEXT: v_mov_b32_e32 v5, v2 734; GFX8PLUS-NEXT: v_mov_b32_e32 v6, v2 735; GFX8PLUS-NEXT: buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe 736; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 737; GFX8PLUS-NEXT: flat_store_dwordx4 v[0:1], v[2:5] 738; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v6 739; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 740; GFX8PLUS-NEXT: ; return to shader part epilog 741; 742; GFX11-LABEL: buffer_load_v4f32_tfe: 743; GFX11: ; %bb.0: 744; GFX11-NEXT: v_mov_b32_e32 v2, 0 745; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 746; GFX11-NEXT: v_mov_b32_e32 v3, v2 747; GFX11-NEXT: v_mov_b32_e32 v4, v2 748; GFX11-NEXT: v_mov_b32_e32 v5, v2 749; GFX11-NEXT: v_mov_b32_e32 v6, v2 750; GFX11-NEXT: buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe 751; GFX11-NEXT: s_waitcnt vmcnt(0) 752; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off 753; GFX11-NEXT: v_mov_b32_e32 v0, v6 754; GFX11-NEXT: ; return to shader part epilog 755; 756; NOPRT-LABEL: buffer_load_v4f32_tfe: 757; NOPRT: ; %bb.0: 758; NOPRT-NEXT: v_mov_b32_e32 v6, 0 759; NOPRT-NEXT: buffer_load_format_xyzw v[2:6], v6, s[0:3], 0 idxen tfe 760; NOPRT-NEXT: s_waitcnt vmcnt(0) 761; NOPRT-NEXT: global_store_b128 v[0:1], v[2:5], off 762; NOPRT-NEXT: v_mov_b32_e32 v0, v6 763; NOPRT-NEXT: ; return to shader part epilog 764 %load = call { <4 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4f32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) 765 %data = extractvalue { <4 x float>, i32 } %load, 0 766 store <4 x float> %data, ptr addrspace(1) %out 767 %status = extractvalue { <4 x float>, i32 } %load, 1 768 %fstatus = bitcast i32 %status to float 769 ret float %fstatus 770} 771 772define amdgpu_cs float @buffer_load_v3i32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) { 773; GFX6-LABEL: buffer_load_v3i32_tfe: 774; GFX6: ; %bb.0: 775; GFX6-NEXT: v_mov_b32_e32 v2, 0 776; GFX6-NEXT: v_mov_b32_e32 v3, v2 777; GFX6-NEXT: v_mov_b32_e32 v4, v2 778; GFX6-NEXT: v_mov_b32_e32 v5, v2 779; GFX6-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe 780; GFX6-NEXT: s_mov_b32 s2, 0 781; GFX6-NEXT: s_mov_b32 s3, 0xf000 782; GFX6-NEXT: s_mov_b32 s0, s2 783; GFX6-NEXT: s_mov_b32 s1, s2 784; GFX6-NEXT: s_waitcnt vmcnt(0) 785; GFX6-NEXT: buffer_store_dword v4, v[0:1], s[0:3], 0 addr64 offset:8 786; GFX6-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 787; GFX6-NEXT: v_mov_b32_e32 v0, v5 788; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 789; GFX6-NEXT: ; return to shader part epilog 790; 791; GFX8PLUS-LABEL: buffer_load_v3i32_tfe: 792; GFX8PLUS: ; %bb.0: 793; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0 794; GFX8PLUS-NEXT: v_mov_b32_e32 v3, v2 795; GFX8PLUS-NEXT: v_mov_b32_e32 v4, v2 796; GFX8PLUS-NEXT: v_mov_b32_e32 v5, v2 797; GFX8PLUS-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe 798; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 799; GFX8PLUS-NEXT: flat_store_dwordx3 v[0:1], v[2:4] 800; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v5 801; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 802; GFX8PLUS-NEXT: ; return to shader part epilog 803; 804; GFX11-LABEL: buffer_load_v3i32_tfe: 805; GFX11: ; %bb.0: 806; GFX11-NEXT: v_mov_b32_e32 v2, 0 807; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 808; GFX11-NEXT: v_mov_b32_e32 v3, v2 809; GFX11-NEXT: v_mov_b32_e32 v4, v2 810; GFX11-NEXT: v_mov_b32_e32 v5, v2 811; GFX11-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe 812; GFX11-NEXT: s_waitcnt vmcnt(0) 813; GFX11-NEXT: global_store_b96 v[0:1], v[2:4], off 814; GFX11-NEXT: v_mov_b32_e32 v0, v5 815; GFX11-NEXT: ; return to shader part epilog 816; 817; NOPRT-LABEL: buffer_load_v3i32_tfe: 818; NOPRT: ; %bb.0: 819; NOPRT-NEXT: v_mov_b32_e32 v5, 0 820; NOPRT-NEXT: buffer_load_format_xyz v[2:5], v5, s[0:3], 0 idxen tfe 821; NOPRT-NEXT: s_waitcnt vmcnt(0) 822; NOPRT-NEXT: global_store_b96 v[0:1], v[2:4], off 823; NOPRT-NEXT: v_mov_b32_e32 v0, v5 824; NOPRT-NEXT: ; return to shader part epilog 825 %load = call { <3 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) 826 %data = extractvalue { <3 x i32>, i32 } %load, 0 827 store <3 x i32> %data, ptr addrspace(1) %out 828 %status = extractvalue { <3 x i32>, i32 } %load, 1 829 %fstatus = bitcast i32 %status to float 830 ret float %fstatus 831} 832 833define amdgpu_cs float @buffer_load_v3f32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) { 834; GFX6-LABEL: buffer_load_v3f32_tfe: 835; GFX6: ; %bb.0: 836; GFX6-NEXT: v_mov_b32_e32 v2, 0 837; GFX6-NEXT: v_mov_b32_e32 v3, v2 838; GFX6-NEXT: v_mov_b32_e32 v4, v2 839; GFX6-NEXT: v_mov_b32_e32 v5, v2 840; GFX6-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe 841; GFX6-NEXT: s_mov_b32 s2, 0 842; GFX6-NEXT: s_mov_b32 s3, 0xf000 843; GFX6-NEXT: s_mov_b32 s0, s2 844; GFX6-NEXT: s_mov_b32 s1, s2 845; GFX6-NEXT: s_waitcnt vmcnt(0) 846; GFX6-NEXT: buffer_store_dword v4, v[0:1], s[0:3], 0 addr64 offset:8 847; GFX6-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 848; GFX6-NEXT: v_mov_b32_e32 v0, v5 849; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 850; GFX6-NEXT: ; return to shader part epilog 851; 852; GFX8PLUS-LABEL: buffer_load_v3f32_tfe: 853; GFX8PLUS: ; %bb.0: 854; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0 855; GFX8PLUS-NEXT: v_mov_b32_e32 v3, v2 856; GFX8PLUS-NEXT: v_mov_b32_e32 v4, v2 857; GFX8PLUS-NEXT: v_mov_b32_e32 v5, v2 858; GFX8PLUS-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe 859; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 860; GFX8PLUS-NEXT: flat_store_dwordx3 v[0:1], v[2:4] 861; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v5 862; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 863; GFX8PLUS-NEXT: ; return to shader part epilog 864; 865; GFX11-LABEL: buffer_load_v3f32_tfe: 866; GFX11: ; %bb.0: 867; GFX11-NEXT: v_mov_b32_e32 v2, 0 868; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 869; GFX11-NEXT: v_mov_b32_e32 v3, v2 870; GFX11-NEXT: v_mov_b32_e32 v4, v2 871; GFX11-NEXT: v_mov_b32_e32 v5, v2 872; GFX11-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe 873; GFX11-NEXT: s_waitcnt vmcnt(0) 874; GFX11-NEXT: global_store_b96 v[0:1], v[2:4], off 875; GFX11-NEXT: v_mov_b32_e32 v0, v5 876; GFX11-NEXT: ; return to shader part epilog 877; 878; NOPRT-LABEL: buffer_load_v3f32_tfe: 879; NOPRT: ; %bb.0: 880; NOPRT-NEXT: v_mov_b32_e32 v5, 0 881; NOPRT-NEXT: buffer_load_format_xyz v[2:5], v5, s[0:3], 0 idxen tfe 882; NOPRT-NEXT: s_waitcnt vmcnt(0) 883; NOPRT-NEXT: global_store_b96 v[0:1], v[2:4], off 884; NOPRT-NEXT: v_mov_b32_e32 v0, v5 885; NOPRT-NEXT: ; return to shader part epilog 886 %load = call { <3 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3f32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) 887 %data = extractvalue { <3 x float>, i32 } %load, 0 888 store <3 x float> %data, ptr addrspace(1) %out 889 %status = extractvalue { <3 x float>, i32 } %load, 1 890 %fstatus = bitcast i32 %status to float 891 ret float %fstatus 892} 893 894define amdgpu_cs float @buffer_load_v2i32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) { 895; GFX6-LABEL: buffer_load_v2i32_tfe: 896; GFX6: ; %bb.0: 897; GFX6-NEXT: v_mov_b32_e32 v2, 0 898; GFX6-NEXT: v_mov_b32_e32 v3, v2 899; GFX6-NEXT: v_mov_b32_e32 v4, v2 900; GFX6-NEXT: v_mov_b32_e32 v5, v2 901; GFX6-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe 902; GFX6-NEXT: s_mov_b32 s2, 0 903; GFX6-NEXT: s_mov_b32 s3, 0xf000 904; GFX6-NEXT: s_mov_b32 s0, s2 905; GFX6-NEXT: s_mov_b32 s1, s2 906; GFX6-NEXT: s_waitcnt vmcnt(0) 907; GFX6-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 908; GFX6-NEXT: v_mov_b32_e32 v0, v4 909; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 910; GFX6-NEXT: ; return to shader part epilog 911; 912; GFX8PLUS-LABEL: buffer_load_v2i32_tfe: 913; GFX8PLUS: ; %bb.0: 914; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0 915; GFX8PLUS-NEXT: v_mov_b32_e32 v3, v2 916; GFX8PLUS-NEXT: v_mov_b32_e32 v4, v2 917; GFX8PLUS-NEXT: buffer_load_format_xy v[2:4], v2, s[0:3], 0 idxen tfe 918; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 919; GFX8PLUS-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 920; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v4 921; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 922; GFX8PLUS-NEXT: ; return to shader part epilog 923; 924; GFX11-LABEL: buffer_load_v2i32_tfe: 925; GFX11: ; %bb.0: 926; GFX11-NEXT: v_mov_b32_e32 v2, 0 927; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 928; GFX11-NEXT: v_mov_b32_e32 v3, v2 929; GFX11-NEXT: v_mov_b32_e32 v4, v2 930; GFX11-NEXT: buffer_load_format_xy v[2:4], v2, s[0:3], 0 idxen tfe 931; GFX11-NEXT: s_waitcnt vmcnt(0) 932; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off 933; GFX11-NEXT: v_mov_b32_e32 v0, v4 934; GFX11-NEXT: ; return to shader part epilog 935; 936; NOPRT-LABEL: buffer_load_v2i32_tfe: 937; NOPRT: ; %bb.0: 938; NOPRT-NEXT: v_mov_b32_e32 v4, 0 939; NOPRT-NEXT: buffer_load_format_xy v[2:4], v4, s[0:3], 0 idxen tfe 940; NOPRT-NEXT: s_waitcnt vmcnt(0) 941; NOPRT-NEXT: global_store_b64 v[0:1], v[2:3], off 942; NOPRT-NEXT: v_mov_b32_e32 v0, v4 943; NOPRT-NEXT: ; return to shader part epilog 944 %load = call { <2 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) 945 %data = extractvalue { <2 x i32>, i32 } %load, 0 946 store <2 x i32> %data, ptr addrspace(1) %out 947 %status = extractvalue { <2 x i32>, i32 } %load, 1 948 %fstatus = bitcast i32 %status to float 949 ret float %fstatus 950} 951 952define amdgpu_cs float @buffer_load_v2f32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) { 953; GFX6-LABEL: buffer_load_v2f32_tfe: 954; GFX6: ; %bb.0: 955; GFX6-NEXT: v_mov_b32_e32 v2, 0 956; GFX6-NEXT: v_mov_b32_e32 v3, v2 957; GFX6-NEXT: v_mov_b32_e32 v4, v2 958; GFX6-NEXT: v_mov_b32_e32 v5, v2 959; GFX6-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe 960; GFX6-NEXT: s_mov_b32 s2, 0 961; GFX6-NEXT: s_mov_b32 s3, 0xf000 962; GFX6-NEXT: s_mov_b32 s0, s2 963; GFX6-NEXT: s_mov_b32 s1, s2 964; GFX6-NEXT: s_waitcnt vmcnt(0) 965; GFX6-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 966; GFX6-NEXT: v_mov_b32_e32 v0, v4 967; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 968; GFX6-NEXT: ; return to shader part epilog 969; 970; GFX8PLUS-LABEL: buffer_load_v2f32_tfe: 971; GFX8PLUS: ; %bb.0: 972; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0 973; GFX8PLUS-NEXT: v_mov_b32_e32 v3, v2 974; GFX8PLUS-NEXT: v_mov_b32_e32 v4, v2 975; GFX8PLUS-NEXT: buffer_load_format_xy v[2:4], v2, s[0:3], 0 idxen tfe 976; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 977; GFX8PLUS-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 978; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v4 979; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 980; GFX8PLUS-NEXT: ; return to shader part epilog 981; 982; GFX11-LABEL: buffer_load_v2f32_tfe: 983; GFX11: ; %bb.0: 984; GFX11-NEXT: v_mov_b32_e32 v2, 0 985; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 986; GFX11-NEXT: v_mov_b32_e32 v3, v2 987; GFX11-NEXT: v_mov_b32_e32 v4, v2 988; GFX11-NEXT: buffer_load_format_xy v[2:4], v2, s[0:3], 0 idxen tfe 989; GFX11-NEXT: s_waitcnt vmcnt(0) 990; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off 991; GFX11-NEXT: v_mov_b32_e32 v0, v4 992; GFX11-NEXT: ; return to shader part epilog 993; 994; NOPRT-LABEL: buffer_load_v2f32_tfe: 995; NOPRT: ; %bb.0: 996; NOPRT-NEXT: v_mov_b32_e32 v4, 0 997; NOPRT-NEXT: buffer_load_format_xy v[2:4], v4, s[0:3], 0 idxen tfe 998; NOPRT-NEXT: s_waitcnt vmcnt(0) 999; NOPRT-NEXT: global_store_b64 v[0:1], v[2:3], off 1000; NOPRT-NEXT: v_mov_b32_e32 v0, v4 1001; NOPRT-NEXT: ; return to shader part epilog 1002 %load = call { <2 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2f32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) 1003 %data = extractvalue { <2 x float>, i32 } %load, 0 1004 store <2 x float> %data, ptr addrspace(1) %out 1005 %status = extractvalue { <2 x float>, i32 } %load, 1 1006 %fstatus = bitcast i32 %status to float 1007 ret float %fstatus 1008} 1009 1010define amdgpu_cs float @buffer_load_i32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) { 1011; GFX6-LABEL: buffer_load_i32_tfe: 1012; GFX6: ; %bb.0: 1013; GFX6-NEXT: v_mov_b32_e32 v2, 0 1014; GFX6-NEXT: v_mov_b32_e32 v3, v2 1015; GFX6-NEXT: buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe 1016; GFX6-NEXT: s_mov_b32 s2, 0 1017; GFX6-NEXT: s_mov_b32 s3, 0xf000 1018; GFX6-NEXT: s_mov_b32 s0, s2 1019; GFX6-NEXT: s_mov_b32 s1, s2 1020; GFX6-NEXT: s_waitcnt vmcnt(0) 1021; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 1022; GFX6-NEXT: v_mov_b32_e32 v0, v3 1023; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1024; GFX6-NEXT: ; return to shader part epilog 1025; 1026; GFX8PLUS-LABEL: buffer_load_i32_tfe: 1027; GFX8PLUS: ; %bb.0: 1028; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0 1029; GFX8PLUS-NEXT: v_mov_b32_e32 v3, v2 1030; GFX8PLUS-NEXT: buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe 1031; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 1032; GFX8PLUS-NEXT: flat_store_dword v[0:1], v2 1033; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v3 1034; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 1035; GFX8PLUS-NEXT: ; return to shader part epilog 1036; 1037; GFX11-LABEL: buffer_load_i32_tfe: 1038; GFX11: ; %bb.0: 1039; GFX11-NEXT: v_mov_b32_e32 v2, 0 1040; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1041; GFX11-NEXT: v_mov_b32_e32 v3, v2 1042; GFX11-NEXT: buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe 1043; GFX11-NEXT: s_waitcnt vmcnt(0) 1044; GFX11-NEXT: global_store_b32 v[0:1], v2, off 1045; GFX11-NEXT: v_mov_b32_e32 v0, v3 1046; GFX11-NEXT: ; return to shader part epilog 1047; 1048; NOPRT-LABEL: buffer_load_i32_tfe: 1049; NOPRT: ; %bb.0: 1050; NOPRT-NEXT: v_mov_b32_e32 v3, 0 1051; NOPRT-NEXT: buffer_load_format_x v[2:3], v3, s[0:3], 0 idxen tfe 1052; NOPRT-NEXT: s_waitcnt vmcnt(0) 1053; NOPRT-NEXT: global_store_b32 v[0:1], v2, off 1054; NOPRT-NEXT: v_mov_b32_e32 v0, v3 1055; NOPRT-NEXT: ; return to shader part epilog 1056 %load = call { i32, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_i32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) 1057 %data = extractvalue { i32, i32 } %load, 0 1058 store i32 %data, ptr addrspace(1) %out 1059 %status = extractvalue { i32, i32 } %load, 1 1060 %fstatus = bitcast i32 %status to float 1061 ret float %fstatus 1062} 1063 1064define amdgpu_cs float @buffer_load_f32_tfe(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %out) { 1065; GFX6-LABEL: buffer_load_f32_tfe: 1066; GFX6: ; %bb.0: 1067; GFX6-NEXT: v_mov_b32_e32 v2, 0 1068; GFX6-NEXT: v_mov_b32_e32 v3, v2 1069; GFX6-NEXT: buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe 1070; GFX6-NEXT: s_mov_b32 s2, 0 1071; GFX6-NEXT: s_mov_b32 s3, 0xf000 1072; GFX6-NEXT: s_mov_b32 s0, s2 1073; GFX6-NEXT: s_mov_b32 s1, s2 1074; GFX6-NEXT: s_waitcnt vmcnt(0) 1075; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 1076; GFX6-NEXT: v_mov_b32_e32 v0, v3 1077; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1078; GFX6-NEXT: ; return to shader part epilog 1079; 1080; GFX8PLUS-LABEL: buffer_load_f32_tfe: 1081; GFX8PLUS: ; %bb.0: 1082; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0 1083; GFX8PLUS-NEXT: v_mov_b32_e32 v3, v2 1084; GFX8PLUS-NEXT: buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe 1085; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 1086; GFX8PLUS-NEXT: flat_store_dword v[0:1], v2 1087; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v3 1088; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 1089; GFX8PLUS-NEXT: ; return to shader part epilog 1090; 1091; GFX11-LABEL: buffer_load_f32_tfe: 1092; GFX11: ; %bb.0: 1093; GFX11-NEXT: v_mov_b32_e32 v2, 0 1094; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1095; GFX11-NEXT: v_mov_b32_e32 v3, v2 1096; GFX11-NEXT: buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe 1097; GFX11-NEXT: s_waitcnt vmcnt(0) 1098; GFX11-NEXT: global_store_b32 v[0:1], v2, off 1099; GFX11-NEXT: v_mov_b32_e32 v0, v3 1100; GFX11-NEXT: ; return to shader part epilog 1101; 1102; NOPRT-LABEL: buffer_load_f32_tfe: 1103; NOPRT: ; %bb.0: 1104; NOPRT-NEXT: v_mov_b32_e32 v3, 0 1105; NOPRT-NEXT: buffer_load_format_x v[2:3], v3, s[0:3], 0 idxen tfe 1106; NOPRT-NEXT: s_waitcnt vmcnt(0) 1107; NOPRT-NEXT: global_store_b32 v[0:1], v2, off 1108; NOPRT-NEXT: v_mov_b32_e32 v0, v3 1109; NOPRT-NEXT: ; return to shader part epilog 1110 %load = call { float, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_f32i32s(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) 1111 %data = extractvalue { float, i32 } %load, 0 1112 store float %data, ptr addrspace(1) %out 1113 %status = extractvalue { float, i32 } %load, 1 1114 %fstatus = bitcast i32 %status to float 1115 ret float %fstatus 1116} 1117 1118declare float @llvm.amdgcn.struct.ptr.buffer.load.format.f32(ptr addrspace(8), i32, i32, i32, i32) #0 1119declare <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v2f32(ptr addrspace(8), i32, i32, i32, i32) #0 1120declare <4 x float> @llvm.amdgcn.struct.ptr.buffer.load.format.v4f32(ptr addrspace(8), i32, i32, i32, i32) #0 1121declare i32 @llvm.amdgcn.struct.ptr.buffer.load.format.i32(ptr addrspace(8), i32, i32, i32, i32) #0 1122declare { <4 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0 1123declare { <4 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v4f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0 1124declare { <3 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0 1125declare { <3 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v3f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0 1126declare { <2 x i32>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0 1127declare { <2 x float>, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_v2f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0 1128declare { i32, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_i32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0 1129declare { float, i32 } @llvm.amdgcn.struct.ptr.buffer.load.format.sl_f32i32s(ptr addrspace(8), i32, i32, i32, i32 immarg) #0 1130attributes #0 = { nounwind readonly } 1131