1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2;RUN: llc < %s -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefixes=GFX6 %s 3;RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefixes=GFX8PLUS %s 4;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck --check-prefixes=GFX11 %s 5;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -mattr=-enable-prt-strict-null -verify-machineinstrs | FileCheck --check-prefixes=NOPRT %s 6;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck --check-prefixes=GFX12,GFX12-SDAG %s 7;RUN: llc < %s -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck --check-prefixes=GFX12,GFX12-GISEL %s 8 9define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load(<4 x i32> inreg) { 10; GFX6-LABEL: buffer_load: 11; GFX6: ; %bb.0: ; %main_body 12; GFX6-NEXT: v_mov_b32_e32 v8, 0 13; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 0 idxen 14; GFX6-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], 0 idxen glc 15; GFX6-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], 0 idxen slc 16; GFX6-NEXT: s_waitcnt vmcnt(0) 17; GFX6-NEXT: ; return to shader part epilog 18; 19; GFX8PLUS-LABEL: buffer_load: 20; GFX8PLUS: ; %bb.0: ; %main_body 21; GFX8PLUS-NEXT: v_mov_b32_e32 v8, 0 22; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 0 idxen 23; GFX8PLUS-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], 0 idxen glc 24; GFX8PLUS-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], 0 idxen slc 25; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 26; GFX8PLUS-NEXT: ; return to shader part epilog 27; 28; GFX11-LABEL: buffer_load: 29; GFX11: ; %bb.0: ; %main_body 30; GFX11-NEXT: v_mov_b32_e32 v8, 0 31; GFX11-NEXT: s_clause 0x2 32; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 0 idxen 33; GFX11-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], 0 idxen glc 34; GFX11-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], 0 idxen slc 35; GFX11-NEXT: s_waitcnt vmcnt(0) 36; GFX11-NEXT: ; return to shader part epilog 37; 38; NOPRT-LABEL: buffer_load: 39; NOPRT: ; %bb.0: ; %main_body 40; NOPRT-NEXT: v_mov_b32_e32 v8, 0 41; NOPRT-NEXT: s_clause 0x2 42; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 0 idxen 43; NOPRT-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], 0 idxen glc 44; NOPRT-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], 0 idxen slc 45; NOPRT-NEXT: s_waitcnt vmcnt(0) 46; NOPRT-NEXT: ; return to shader part epilog 47; 48; GFX12-LABEL: buffer_load: 49; GFX12: ; %bb.0: ; %main_body 50; GFX12-NEXT: v_mov_b32_e32 v8, 0 51; GFX12-NEXT: s_clause 0x2 52; GFX12-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], null idxen 53; GFX12-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], null idxen th:TH_LOAD_NT 54; GFX12-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], null idxen th:TH_LOAD_HT 55; GFX12-NEXT: s_wait_loadcnt 0x0 56; GFX12-NEXT: ; return to shader part epilog 57main_body: 58 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 0) 59 %data_glc = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 1) 60 %data_slc = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 0, i32 0, i32 2) 61 %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %data, 0 62 %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %data_glc, 1 63 %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %data_slc, 2 64 ret {<4 x float>, <4 x float>, <4 x float>} %r2 65} 66 67define amdgpu_ps <4 x float> @buffer_load_immoffs(<4 x i32> inreg) { 68; GFX6-LABEL: buffer_load_immoffs: 69; GFX6: ; %bb.0: ; %main_body 70; GFX6-NEXT: v_mov_b32_e32 v0, 0 71; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:42 72; GFX6-NEXT: s_waitcnt vmcnt(0) 73; GFX6-NEXT: ; return to shader part epilog 74; 75; GFX8PLUS-LABEL: buffer_load_immoffs: 76; GFX8PLUS: ; %bb.0: ; %main_body 77; GFX8PLUS-NEXT: v_mov_b32_e32 v0, 0 78; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:42 79; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 80; GFX8PLUS-NEXT: ; return to shader part epilog 81; 82; GFX11-LABEL: buffer_load_immoffs: 83; GFX11: ; %bb.0: ; %main_body 84; GFX11-NEXT: v_mov_b32_e32 v0, 0 85; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:42 86; GFX11-NEXT: s_waitcnt vmcnt(0) 87; GFX11-NEXT: ; return to shader part epilog 88; 89; NOPRT-LABEL: buffer_load_immoffs: 90; NOPRT: ; %bb.0: ; %main_body 91; NOPRT-NEXT: v_mov_b32_e32 v0, 0 92; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:42 93; NOPRT-NEXT: s_waitcnt vmcnt(0) 94; NOPRT-NEXT: ; return to shader part epilog 95; 96; GFX12-LABEL: buffer_load_immoffs: 97; GFX12: ; %bb.0: ; %main_body 98; GFX12-NEXT: v_mov_b32_e32 v0, 0 99; GFX12-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], null idxen offset:42 100; GFX12-NEXT: s_wait_loadcnt 0x0 101; GFX12-NEXT: ; return to shader part epilog 102main_body: 103 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 42, i32 0, i32 0) 104 ret <4 x float> %data 105} 106 107define amdgpu_ps <4 x float> @buffer_load_immoffs_large(<4 x i32> inreg) { 108; GFX6-LABEL: buffer_load_immoffs_large: 109; GFX6: ; %bb.0: ; %main_body 110; GFX6-NEXT: v_mov_b32_e32 v8, 0 111; GFX6-NEXT: s_movk_i32 s4, 0x7ffc 112; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 60 idxen offset:4092 113; GFX6-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], s4 idxen offset:4092 114; GFX6-NEXT: s_mov_b32 s4, 0x8ffc 115; GFX6-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], s4 idxen offset:4 116; GFX6-NEXT: s_waitcnt vmcnt(1) 117; GFX6-NEXT: v_add_f32_e32 v3, v3, v7 118; GFX6-NEXT: v_add_f32_e32 v2, v2, v6 119; GFX6-NEXT: v_add_f32_e32 v1, v1, v5 120; GFX6-NEXT: v_add_f32_e32 v0, v0, v4 121; GFX6-NEXT: s_waitcnt vmcnt(0) 122; GFX6-NEXT: v_add_f32_e32 v0, v8, v0 123; GFX6-NEXT: v_add_f32_e32 v1, v9, v1 124; GFX6-NEXT: v_add_f32_e32 v2, v10, v2 125; GFX6-NEXT: v_add_f32_e32 v3, v11, v3 126; GFX6-NEXT: ; return to shader part epilog 127; 128; GFX8PLUS-LABEL: buffer_load_immoffs_large: 129; GFX8PLUS: ; %bb.0: ; %main_body 130; GFX8PLUS-NEXT: v_mov_b32_e32 v8, 0 131; GFX8PLUS-NEXT: s_movk_i32 s4, 0x7ffc 132; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 60 idxen offset:4092 133; GFX8PLUS-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], s4 idxen offset:4092 134; GFX8PLUS-NEXT: s_mov_b32 s4, 0x8ffc 135; GFX8PLUS-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], s4 idxen offset:4 136; GFX8PLUS-NEXT: s_waitcnt vmcnt(1) 137; GFX8PLUS-NEXT: v_add_f32_e32 v3, v3, v7 138; GFX8PLUS-NEXT: v_add_f32_e32 v2, v2, v6 139; GFX8PLUS-NEXT: v_add_f32_e32 v1, v1, v5 140; GFX8PLUS-NEXT: v_add_f32_e32 v0, v0, v4 141; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 142; GFX8PLUS-NEXT: v_add_f32_e32 v0, v8, v0 143; GFX8PLUS-NEXT: v_add_f32_e32 v1, v9, v1 144; GFX8PLUS-NEXT: v_add_f32_e32 v2, v10, v2 145; GFX8PLUS-NEXT: v_add_f32_e32 v3, v11, v3 146; GFX8PLUS-NEXT: ; return to shader part epilog 147; 148; GFX11-LABEL: buffer_load_immoffs_large: 149; GFX11: ; %bb.0: ; %main_body 150; GFX11-NEXT: v_mov_b32_e32 v8, 0 151; GFX11-NEXT: s_movk_i32 s4, 0x7ffc 152; GFX11-NEXT: s_clause 0x1 153; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 60 idxen offset:4092 154; GFX11-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], s4 idxen offset:4092 155; GFX11-NEXT: s_mov_b32 s4, 0x8ffc 156; GFX11-NEXT: s_waitcnt vmcnt(0) 157; GFX11-NEXT: v_add_f32_e32 v1, v1, v5 158; GFX11-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], s4 idxen offset:4 159; GFX11-NEXT: v_dual_add_f32 v0, v0, v4 :: v_dual_add_f32 v3, v3, v7 160; GFX11-NEXT: s_waitcnt vmcnt(0) 161; GFX11-NEXT: v_dual_add_f32 v2, v2, v6 :: v_dual_add_f32 v1, v9, v1 162; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 163; GFX11-NEXT: v_dual_add_f32 v0, v8, v0 :: v_dual_add_f32 v3, v11, v3 164; GFX11-NEXT: v_add_f32_e32 v2, v10, v2 165; GFX11-NEXT: ; return to shader part epilog 166; 167; NOPRT-LABEL: buffer_load_immoffs_large: 168; NOPRT: ; %bb.0: ; %main_body 169; NOPRT-NEXT: v_mov_b32_e32 v8, 0 170; NOPRT-NEXT: s_movk_i32 s4, 0x7ffc 171; NOPRT-NEXT: s_clause 0x1 172; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], 60 idxen offset:4092 173; NOPRT-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], s4 idxen offset:4092 174; NOPRT-NEXT: s_mov_b32 s4, 0x8ffc 175; NOPRT-NEXT: s_waitcnt vmcnt(0) 176; NOPRT-NEXT: v_add_f32_e32 v1, v1, v5 177; NOPRT-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], s4 idxen offset:4 178; NOPRT-NEXT: v_dual_add_f32 v0, v0, v4 :: v_dual_add_f32 v3, v3, v7 179; NOPRT-NEXT: s_waitcnt vmcnt(0) 180; NOPRT-NEXT: v_dual_add_f32 v2, v2, v6 :: v_dual_add_f32 v1, v9, v1 181; NOPRT-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 182; NOPRT-NEXT: v_dual_add_f32 v0, v8, v0 :: v_dual_add_f32 v3, v11, v3 183; NOPRT-NEXT: v_add_f32_e32 v2, v10, v2 184; NOPRT-NEXT: ; return to shader part epilog 185; 186; GFX12-LABEL: buffer_load_immoffs_large: 187; GFX12: ; %bb.0: ; %main_body 188; GFX12-NEXT: v_mov_b32_e32 v8, 0 189; GFX12-NEXT: s_mov_b32 s4, 60 190; GFX12-NEXT: s_movk_i32 s5, 0x7ffc 191; GFX12-NEXT: s_clause 0x1 192; GFX12-NEXT: buffer_load_format_xyzw v[0:3], v8, s[0:3], s4 idxen offset:4092 193; GFX12-NEXT: buffer_load_format_xyzw v[4:7], v8, s[0:3], s5 idxen offset:4092 194; GFX12-NEXT: s_mov_b32 s4, 0x8ffc 195; GFX12-NEXT: s_wait_loadcnt 0x0 196; GFX12-NEXT: v_add_f32_e32 v1, v1, v5 197; GFX12-NEXT: buffer_load_format_xyzw v[8:11], v8, s[0:3], s4 idxen offset:4 198; GFX12-NEXT: v_dual_add_f32 v0, v0, v4 :: v_dual_add_f32 v3, v3, v7 199; GFX12-NEXT: s_wait_loadcnt 0x0 200; GFX12-NEXT: v_dual_add_f32 v2, v2, v6 :: v_dual_add_f32 v1, v9, v1 201; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 202; GFX12-NEXT: v_dual_add_f32 v0, v8, v0 :: v_dual_add_f32 v3, v11, v3 203; GFX12-NEXT: v_add_f32_e32 v2, v10, v2 204; GFX12-NEXT: ; return to shader part epilog 205main_body: 206 %d.0 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 4092, i32 60, i32 0) 207 %d.1 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 4092, i32 32764, i32 0) 208 %d.2 = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 4, i32 36860, i32 0) 209 %d.3 = fadd <4 x float> %d.0, %d.1 210 %data = fadd <4 x float> %d.2, %d.3 211 ret <4 x float> %data 212} 213 214define amdgpu_ps <4 x float> @buffer_load_voffset_large_12bit(<4 x i32> inreg) { 215; GFX6-LABEL: buffer_load_voffset_large_12bit: 216; GFX6: ; %bb.0: ; %main_body 217; GFX6-NEXT: v_mov_b32_e32 v0, 0 218; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092 219; GFX6-NEXT: s_waitcnt vmcnt(0) 220; GFX6-NEXT: ; return to shader part epilog 221; 222; GFX8PLUS-LABEL: buffer_load_voffset_large_12bit: 223; GFX8PLUS: ; %bb.0: ; %main_body 224; GFX8PLUS-NEXT: v_mov_b32_e32 v0, 0 225; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092 226; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 227; GFX8PLUS-NEXT: ; return to shader part epilog 228; 229; GFX11-LABEL: buffer_load_voffset_large_12bit: 230; GFX11: ; %bb.0: ; %main_body 231; GFX11-NEXT: v_mov_b32_e32 v0, 0 232; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092 233; GFX11-NEXT: s_waitcnt vmcnt(0) 234; GFX11-NEXT: ; return to shader part epilog 235; 236; NOPRT-LABEL: buffer_load_voffset_large_12bit: 237; NOPRT: ; %bb.0: ; %main_body 238; NOPRT-NEXT: v_mov_b32_e32 v0, 0 239; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen offset:4092 240; NOPRT-NEXT: s_waitcnt vmcnt(0) 241; NOPRT-NEXT: ; return to shader part epilog 242; 243; GFX12-LABEL: buffer_load_voffset_large_12bit: 244; GFX12: ; %bb.0: ; %main_body 245; GFX12-NEXT: v_mov_b32_e32 v0, 0 246; GFX12-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], null idxen offset:4092 247; GFX12-NEXT: s_wait_loadcnt 0x0 248; GFX12-NEXT: ; return to shader part epilog 249main_body: 250 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 4092, i32 0, i32 0) 251 ret <4 x float> %data 252} 253 254define amdgpu_ps <4 x float> @buffer_load_voffset_large_13bit(<4 x i32> inreg) { 255; GFX6-LABEL: buffer_load_voffset_large_13bit: 256; GFX6: ; %bb.0: ; %main_body 257; GFX6-NEXT: s_mov_b32 s4, 0 258; GFX6-NEXT: v_mov_b32_e32 v1, 0x1000 259; GFX6-NEXT: v_mov_b32_e32 v0, s4 260; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 261; GFX6-NEXT: s_waitcnt vmcnt(0) 262; GFX6-NEXT: ; return to shader part epilog 263; 264; GFX8PLUS-LABEL: buffer_load_voffset_large_13bit: 265; GFX8PLUS: ; %bb.0: ; %main_body 266; GFX8PLUS-NEXT: s_mov_b32 s4, 0 267; GFX8PLUS-NEXT: v_mov_b32_e32 v1, 0x1000 268; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4 269; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 270; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 271; GFX8PLUS-NEXT: ; return to shader part epilog 272; 273; GFX11-LABEL: buffer_load_voffset_large_13bit: 274; GFX11: ; %bb.0: ; %main_body 275; GFX11-NEXT: s_mov_b32 s4, 0 276; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 277; GFX11-NEXT: v_dual_mov_b32 v1, 0x1000 :: v_dual_mov_b32 v0, s4 278; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 279; GFX11-NEXT: s_waitcnt vmcnt(0) 280; GFX11-NEXT: ; return to shader part epilog 281; 282; NOPRT-LABEL: buffer_load_voffset_large_13bit: 283; NOPRT: ; %bb.0: ; %main_body 284; NOPRT-NEXT: s_mov_b32 s4, 0 285; NOPRT-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 286; NOPRT-NEXT: v_dual_mov_b32 v1, 0x1000 :: v_dual_mov_b32 v0, s4 287; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 288; NOPRT-NEXT: s_waitcnt vmcnt(0) 289; NOPRT-NEXT: ; return to shader part epilog 290; 291; GFX12-LABEL: buffer_load_voffset_large_13bit: 292; GFX12: ; %bb.0: ; %main_body 293; GFX12-NEXT: v_mov_b32_e32 v0, 0 294; GFX12-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], null idxen offset:8188 295; GFX12-NEXT: s_wait_loadcnt 0x0 296; GFX12-NEXT: ; return to shader part epilog 297main_body: 298 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 8188, i32 0, i32 0) 299 ret <4 x float> %data 300} 301 302define amdgpu_ps <4 x float> @buffer_load_voffset_large_16bit(<4 x i32> inreg) { 303; GFX6-LABEL: buffer_load_voffset_large_16bit: 304; GFX6: ; %bb.0: ; %main_body 305; GFX6-NEXT: s_mov_b32 s4, 0 306; GFX6-NEXT: v_mov_b32_e32 v1, 0xf000 307; GFX6-NEXT: v_mov_b32_e32 v0, s4 308; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 309; GFX6-NEXT: s_waitcnt vmcnt(0) 310; GFX6-NEXT: ; return to shader part epilog 311; 312; GFX8PLUS-LABEL: buffer_load_voffset_large_16bit: 313; GFX8PLUS: ; %bb.0: ; %main_body 314; GFX8PLUS-NEXT: s_mov_b32 s4, 0 315; GFX8PLUS-NEXT: v_mov_b32_e32 v1, 0xf000 316; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4 317; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 318; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 319; GFX8PLUS-NEXT: ; return to shader part epilog 320; 321; GFX11-LABEL: buffer_load_voffset_large_16bit: 322; GFX11: ; %bb.0: ; %main_body 323; GFX11-NEXT: s_mov_b32 s4, 0 324; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 325; GFX11-NEXT: v_dual_mov_b32 v1, 0xf000 :: v_dual_mov_b32 v0, s4 326; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 327; GFX11-NEXT: s_waitcnt vmcnt(0) 328; GFX11-NEXT: ; return to shader part epilog 329; 330; NOPRT-LABEL: buffer_load_voffset_large_16bit: 331; NOPRT: ; %bb.0: ; %main_body 332; NOPRT-NEXT: s_mov_b32 s4, 0 333; NOPRT-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 334; NOPRT-NEXT: v_dual_mov_b32 v1, 0xf000 :: v_dual_mov_b32 v0, s4 335; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 336; NOPRT-NEXT: s_waitcnt vmcnt(0) 337; NOPRT-NEXT: ; return to shader part epilog 338; 339; GFX12-LABEL: buffer_load_voffset_large_16bit: 340; GFX12: ; %bb.0: ; %main_body 341; GFX12-NEXT: v_mov_b32_e32 v0, 0 342; GFX12-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], null idxen offset:65532 343; GFX12-NEXT: s_wait_loadcnt 0x0 344; GFX12-NEXT: ; return to shader part epilog 345main_body: 346 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 65532, i32 0, i32 0) 347 ret <4 x float> %data 348} 349 350define amdgpu_ps <4 x float> @buffer_load_voffset_large_23bit(<4 x i32> inreg) { 351; GFX6-LABEL: buffer_load_voffset_large_23bit: 352; GFX6: ; %bb.0: ; %main_body 353; GFX6-NEXT: s_mov_b32 s4, 0 354; GFX6-NEXT: v_mov_b32_e32 v1, 0x7ff000 355; GFX6-NEXT: v_mov_b32_e32 v0, s4 356; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 357; GFX6-NEXT: s_waitcnt vmcnt(0) 358; GFX6-NEXT: ; return to shader part epilog 359; 360; GFX8PLUS-LABEL: buffer_load_voffset_large_23bit: 361; GFX8PLUS: ; %bb.0: ; %main_body 362; GFX8PLUS-NEXT: s_mov_b32 s4, 0 363; GFX8PLUS-NEXT: v_mov_b32_e32 v1, 0x7ff000 364; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4 365; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 366; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 367; GFX8PLUS-NEXT: ; return to shader part epilog 368; 369; GFX11-LABEL: buffer_load_voffset_large_23bit: 370; GFX11: ; %bb.0: ; %main_body 371; GFX11-NEXT: s_mov_b32 s4, 0 372; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 373; GFX11-NEXT: v_dual_mov_b32 v1, 0x7ff000 :: v_dual_mov_b32 v0, s4 374; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 375; GFX11-NEXT: s_waitcnt vmcnt(0) 376; GFX11-NEXT: ; return to shader part epilog 377; 378; NOPRT-LABEL: buffer_load_voffset_large_23bit: 379; NOPRT: ; %bb.0: ; %main_body 380; NOPRT-NEXT: s_mov_b32 s4, 0 381; NOPRT-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 382; NOPRT-NEXT: v_dual_mov_b32 v1, 0x7ff000 :: v_dual_mov_b32 v0, s4 383; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 384; NOPRT-NEXT: s_waitcnt vmcnt(0) 385; NOPRT-NEXT: ; return to shader part epilog 386; 387; GFX12-LABEL: buffer_load_voffset_large_23bit: 388; GFX12: ; %bb.0: ; %main_body 389; GFX12-NEXT: v_mov_b32_e32 v0, 0 390; GFX12-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], null idxen offset:8388604 391; GFX12-NEXT: s_wait_loadcnt 0x0 392; GFX12-NEXT: ; return to shader part epilog 393main_body: 394 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 8388604, i32 0, i32 0) 395 ret <4 x float> %data 396} 397 398define amdgpu_ps <4 x float> @buffer_load_voffset_large_24bit(<4 x i32> inreg) { 399; GFX6-LABEL: buffer_load_voffset_large_24bit: 400; GFX6: ; %bb.0: ; %main_body 401; GFX6-NEXT: s_mov_b32 s4, 0 402; GFX6-NEXT: v_mov_b32_e32 v1, 0xfff000 403; GFX6-NEXT: v_mov_b32_e32 v0, s4 404; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 405; GFX6-NEXT: s_waitcnt vmcnt(0) 406; GFX6-NEXT: ; return to shader part epilog 407; 408; GFX8PLUS-LABEL: buffer_load_voffset_large_24bit: 409; GFX8PLUS: ; %bb.0: ; %main_body 410; GFX8PLUS-NEXT: s_mov_b32 s4, 0 411; GFX8PLUS-NEXT: v_mov_b32_e32 v1, 0xfff000 412; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4 413; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 414; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 415; GFX8PLUS-NEXT: ; return to shader part epilog 416; 417; GFX11-LABEL: buffer_load_voffset_large_24bit: 418; GFX11: ; %bb.0: ; %main_body 419; GFX11-NEXT: s_mov_b32 s4, 0 420; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 421; GFX11-NEXT: v_dual_mov_b32 v1, 0xfff000 :: v_dual_mov_b32 v0, s4 422; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 423; GFX11-NEXT: s_waitcnt vmcnt(0) 424; GFX11-NEXT: ; return to shader part epilog 425; 426; NOPRT-LABEL: buffer_load_voffset_large_24bit: 427; NOPRT: ; %bb.0: ; %main_body 428; NOPRT-NEXT: s_mov_b32 s4, 0 429; NOPRT-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 430; NOPRT-NEXT: v_dual_mov_b32 v1, 0xfff000 :: v_dual_mov_b32 v0, s4 431; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:4092 432; NOPRT-NEXT: s_waitcnt vmcnt(0) 433; NOPRT-NEXT: ; return to shader part epilog 434; 435; GFX12-SDAG-LABEL: buffer_load_voffset_large_24bit: 436; GFX12-SDAG: ; %bb.0: ; %main_body 437; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, 0x800000 :: v_dual_mov_b32 v0, 0 438; GFX12-SDAG-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], null idxen offen offset:8388604 439; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0 440; GFX12-SDAG-NEXT: ; return to shader part epilog 441; 442; GFX12-GISEL-LABEL: buffer_load_voffset_large_24bit: 443; GFX12-GISEL: ; %bb.0: ; %main_body 444; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x800000 445; GFX12-GISEL-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], null idxen offen offset:8388604 446; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0 447; GFX12-GISEL-NEXT: ; return to shader part epilog 448main_body: 449 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 16777212, i32 0, i32 0) 450 ret <4 x float> %data 451} 452 453define amdgpu_ps <4 x float> @buffer_load_idx(<4 x i32> inreg, i32) { 454; GFX6-LABEL: buffer_load_idx: 455; GFX6: ; %bb.0: ; %main_body 456; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen 457; GFX6-NEXT: s_waitcnt vmcnt(0) 458; GFX6-NEXT: ; return to shader part epilog 459; 460; GFX8PLUS-LABEL: buffer_load_idx: 461; GFX8PLUS: ; %bb.0: ; %main_body 462; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen 463; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 464; GFX8PLUS-NEXT: ; return to shader part epilog 465; 466; GFX11-LABEL: buffer_load_idx: 467; GFX11: ; %bb.0: ; %main_body 468; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen 469; GFX11-NEXT: s_waitcnt vmcnt(0) 470; GFX11-NEXT: ; return to shader part epilog 471; 472; NOPRT-LABEL: buffer_load_idx: 473; NOPRT: ; %bb.0: ; %main_body 474; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen 475; NOPRT-NEXT: s_waitcnt vmcnt(0) 476; NOPRT-NEXT: ; return to shader part epilog 477; 478; GFX12-LABEL: buffer_load_idx: 479; GFX12: ; %bb.0: ; %main_body 480; GFX12-NEXT: buffer_load_format_xyzw v[0:3], v0, s[0:3], null idxen 481; GFX12-NEXT: s_wait_loadcnt 0x0 482; GFX12-NEXT: ; return to shader part epilog 483main_body: 484 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 %1, i32 0, i32 0, i32 0) 485 ret <4 x float> %data 486} 487 488define amdgpu_ps <4 x float> @buffer_load_ofs(<4 x i32> inreg, i32) { 489; GFX6-LABEL: buffer_load_ofs: 490; GFX6: ; %bb.0: ; %main_body 491; GFX6-NEXT: s_mov_b32 s4, 0 492; GFX6-NEXT: v_mov_b32_e32 v1, v0 493; GFX6-NEXT: v_mov_b32_e32 v0, s4 494; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen 495; GFX6-NEXT: s_waitcnt vmcnt(0) 496; GFX6-NEXT: ; return to shader part epilog 497; 498; GFX8PLUS-LABEL: buffer_load_ofs: 499; GFX8PLUS: ; %bb.0: ; %main_body 500; GFX8PLUS-NEXT: s_mov_b32 s4, 0 501; GFX8PLUS-NEXT: v_mov_b32_e32 v1, v0 502; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4 503; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen 504; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 505; GFX8PLUS-NEXT: ; return to shader part epilog 506; 507; GFX11-LABEL: buffer_load_ofs: 508; GFX11: ; %bb.0: ; %main_body 509; GFX11-NEXT: s_mov_b32 s4, 0 510; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 511; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4 512; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen 513; GFX11-NEXT: s_waitcnt vmcnt(0) 514; GFX11-NEXT: ; return to shader part epilog 515; 516; NOPRT-LABEL: buffer_load_ofs: 517; NOPRT: ; %bb.0: ; %main_body 518; NOPRT-NEXT: s_mov_b32 s4, 0 519; NOPRT-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 520; NOPRT-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4 521; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen 522; NOPRT-NEXT: s_waitcnt vmcnt(0) 523; NOPRT-NEXT: ; return to shader part epilog 524; 525; GFX12-LABEL: buffer_load_ofs: 526; GFX12: ; %bb.0: ; %main_body 527; GFX12-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, 0 528; GFX12-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], null idxen offen 529; GFX12-NEXT: s_wait_loadcnt 0x0 530; GFX12-NEXT: ; return to shader part epilog 531main_body: 532 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 %1, i32 0, i32 0) 533 ret <4 x float> %data 534} 535 536define amdgpu_ps <4 x float> @buffer_load_ofs_imm(<4 x i32> inreg, i32) { 537; GFX6-LABEL: buffer_load_ofs_imm: 538; GFX6: ; %bb.0: ; %main_body 539; GFX6-NEXT: s_mov_b32 s4, 0 540; GFX6-NEXT: v_mov_b32_e32 v1, v0 541; GFX6-NEXT: v_mov_b32_e32 v0, s4 542; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:60 543; GFX6-NEXT: s_waitcnt vmcnt(0) 544; GFX6-NEXT: ; return to shader part epilog 545; 546; GFX8PLUS-LABEL: buffer_load_ofs_imm: 547; GFX8PLUS: ; %bb.0: ; %main_body 548; GFX8PLUS-NEXT: s_mov_b32 s4, 0 549; GFX8PLUS-NEXT: v_mov_b32_e32 v1, v0 550; GFX8PLUS-NEXT: v_mov_b32_e32 v0, s4 551; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:60 552; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 553; GFX8PLUS-NEXT: ; return to shader part epilog 554; 555; GFX11-LABEL: buffer_load_ofs_imm: 556; GFX11: ; %bb.0: ; %main_body 557; GFX11-NEXT: s_mov_b32 s4, 0 558; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 559; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4 560; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:60 561; GFX11-NEXT: s_waitcnt vmcnt(0) 562; GFX11-NEXT: ; return to shader part epilog 563; 564; NOPRT-LABEL: buffer_load_ofs_imm: 565; NOPRT: ; %bb.0: ; %main_body 566; NOPRT-NEXT: s_mov_b32 s4, 0 567; NOPRT-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 568; NOPRT-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4 569; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen offset:60 570; NOPRT-NEXT: s_waitcnt vmcnt(0) 571; NOPRT-NEXT: ; return to shader part epilog 572; 573; GFX12-LABEL: buffer_load_ofs_imm: 574; GFX12: ; %bb.0: ; %main_body 575; GFX12-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, 0 576; GFX12-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], null idxen offen offset:60 577; GFX12-NEXT: s_wait_loadcnt 0x0 578; GFX12-NEXT: ; return to shader part epilog 579main_body: 580 %ofs = add i32 %1, 60 581 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 0, i32 %ofs, i32 0, i32 0) 582 ret <4 x float> %data 583} 584 585define amdgpu_ps <4 x float> @buffer_load_both(<4 x i32> inreg, i32, i32) { 586; GFX6-LABEL: buffer_load_both: 587; GFX6: ; %bb.0: ; %main_body 588; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen 589; GFX6-NEXT: s_waitcnt vmcnt(0) 590; GFX6-NEXT: ; return to shader part epilog 591; 592; GFX8PLUS-LABEL: buffer_load_both: 593; GFX8PLUS: ; %bb.0: ; %main_body 594; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen 595; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 596; GFX8PLUS-NEXT: ; return to shader part epilog 597; 598; GFX11-LABEL: buffer_load_both: 599; GFX11: ; %bb.0: ; %main_body 600; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen 601; GFX11-NEXT: s_waitcnt vmcnt(0) 602; GFX11-NEXT: ; return to shader part epilog 603; 604; NOPRT-LABEL: buffer_load_both: 605; NOPRT: ; %bb.0: ; %main_body 606; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], 0 idxen offen 607; NOPRT-NEXT: s_waitcnt vmcnt(0) 608; NOPRT-NEXT: ; return to shader part epilog 609; 610; GFX12-LABEL: buffer_load_both: 611; GFX12: ; %bb.0: ; %main_body 612; GFX12-NEXT: buffer_load_format_xyzw v[0:3], v[0:1], s[0:3], null idxen offen 613; GFX12-NEXT: s_wait_loadcnt 0x0 614; GFX12-NEXT: ; return to shader part epilog 615main_body: 616 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 %1, i32 %2, i32 0, i32 0) 617 ret <4 x float> %data 618} 619 620define amdgpu_ps <4 x float> @buffer_load_both_reversed(<4 x i32> inreg, i32, i32) { 621; GFX6-LABEL: buffer_load_both_reversed: 622; GFX6: ; %bb.0: ; %main_body 623; GFX6-NEXT: v_mov_b32_e32 v2, v0 624; GFX6-NEXT: buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], 0 idxen offen 625; GFX6-NEXT: s_waitcnt vmcnt(0) 626; GFX6-NEXT: ; return to shader part epilog 627; 628; GFX8PLUS-LABEL: buffer_load_both_reversed: 629; GFX8PLUS: ; %bb.0: ; %main_body 630; GFX8PLUS-NEXT: v_mov_b32_e32 v2, v0 631; GFX8PLUS-NEXT: buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], 0 idxen offen 632; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 633; GFX8PLUS-NEXT: ; return to shader part epilog 634; 635; GFX11-LABEL: buffer_load_both_reversed: 636; GFX11: ; %bb.0: ; %main_body 637; GFX11-NEXT: v_mov_b32_e32 v2, v0 638; GFX11-NEXT: buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], 0 idxen offen 639; GFX11-NEXT: s_waitcnt vmcnt(0) 640; GFX11-NEXT: ; return to shader part epilog 641; 642; NOPRT-LABEL: buffer_load_both_reversed: 643; NOPRT: ; %bb.0: ; %main_body 644; NOPRT-NEXT: v_mov_b32_e32 v2, v0 645; NOPRT-NEXT: buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], 0 idxen offen 646; NOPRT-NEXT: s_waitcnt vmcnt(0) 647; NOPRT-NEXT: ; return to shader part epilog 648; 649; GFX12-LABEL: buffer_load_both_reversed: 650; GFX12: ; %bb.0: ; %main_body 651; GFX12-NEXT: v_mov_b32_e32 v2, v0 652; GFX12-NEXT: buffer_load_format_xyzw v[0:3], v[1:2], s[0:3], null idxen offen 653; GFX12-NEXT: s_wait_loadcnt 0x0 654; GFX12-NEXT: ; return to shader part epilog 655main_body: 656 %data = call <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32> %0, i32 %2, i32 %1, i32 0, i32 0) 657 ret <4 x float> %data 658} 659 660define amdgpu_ps float @buffer_load_x(<4 x i32> inreg %rsrc) { 661; GFX6-LABEL: buffer_load_x: 662; GFX6: ; %bb.0: ; %main_body 663; GFX6-NEXT: v_mov_b32_e32 v0, 0 664; GFX6-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen 665; GFX6-NEXT: s_waitcnt vmcnt(0) 666; GFX6-NEXT: ; return to shader part epilog 667; 668; GFX8PLUS-LABEL: buffer_load_x: 669; GFX8PLUS: ; %bb.0: ; %main_body 670; GFX8PLUS-NEXT: v_mov_b32_e32 v0, 0 671; GFX8PLUS-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen 672; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 673; GFX8PLUS-NEXT: ; return to shader part epilog 674; 675; GFX11-LABEL: buffer_load_x: 676; GFX11: ; %bb.0: ; %main_body 677; GFX11-NEXT: v_mov_b32_e32 v0, 0 678; GFX11-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen 679; GFX11-NEXT: s_waitcnt vmcnt(0) 680; GFX11-NEXT: ; return to shader part epilog 681; 682; NOPRT-LABEL: buffer_load_x: 683; NOPRT: ; %bb.0: ; %main_body 684; NOPRT-NEXT: v_mov_b32_e32 v0, 0 685; NOPRT-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen 686; NOPRT-NEXT: s_waitcnt vmcnt(0) 687; NOPRT-NEXT: ; return to shader part epilog 688; 689; GFX12-LABEL: buffer_load_x: 690; GFX12: ; %bb.0: ; %main_body 691; GFX12-NEXT: v_mov_b32_e32 v0, 0 692; GFX12-NEXT: buffer_load_format_x v0, v0, s[0:3], null idxen 693; GFX12-NEXT: s_wait_loadcnt 0x0 694; GFX12-NEXT: ; return to shader part epilog 695main_body: 696 %data = call float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) 697 ret float %data 698} 699 700define amdgpu_ps float @buffer_load_x_i32(<4 x i32> inreg %rsrc) { 701; GFX6-LABEL: buffer_load_x_i32: 702; GFX6: ; %bb.0: ; %main_body 703; GFX6-NEXT: v_mov_b32_e32 v0, 0 704; GFX6-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen 705; GFX6-NEXT: s_waitcnt vmcnt(0) 706; GFX6-NEXT: ; return to shader part epilog 707; 708; GFX8PLUS-LABEL: buffer_load_x_i32: 709; GFX8PLUS: ; %bb.0: ; %main_body 710; GFX8PLUS-NEXT: v_mov_b32_e32 v0, 0 711; GFX8PLUS-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen 712; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 713; GFX8PLUS-NEXT: ; return to shader part epilog 714; 715; GFX11-LABEL: buffer_load_x_i32: 716; GFX11: ; %bb.0: ; %main_body 717; GFX11-NEXT: v_mov_b32_e32 v0, 0 718; GFX11-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen 719; GFX11-NEXT: s_waitcnt vmcnt(0) 720; GFX11-NEXT: ; return to shader part epilog 721; 722; NOPRT-LABEL: buffer_load_x_i32: 723; NOPRT: ; %bb.0: ; %main_body 724; NOPRT-NEXT: v_mov_b32_e32 v0, 0 725; NOPRT-NEXT: buffer_load_format_x v0, v0, s[0:3], 0 idxen 726; NOPRT-NEXT: s_waitcnt vmcnt(0) 727; NOPRT-NEXT: ; return to shader part epilog 728; 729; GFX12-LABEL: buffer_load_x_i32: 730; GFX12: ; %bb.0: ; %main_body 731; GFX12-NEXT: v_mov_b32_e32 v0, 0 732; GFX12-NEXT: buffer_load_format_x v0, v0, s[0:3], null idxen 733; GFX12-NEXT: s_wait_loadcnt 0x0 734; GFX12-NEXT: ; return to shader part epilog 735main_body: 736 %data = call i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) 737 %fdata = bitcast i32 %data to float 738 ret float %fdata 739} 740 741define amdgpu_ps <2 x float> @buffer_load_xy(<4 x i32> inreg %rsrc) { 742; GFX6-LABEL: buffer_load_xy: 743; GFX6: ; %bb.0: ; %main_body 744; GFX6-NEXT: v_mov_b32_e32 v0, 0 745; GFX6-NEXT: buffer_load_format_xy v[0:1], v0, s[0:3], 0 idxen 746; GFX6-NEXT: s_waitcnt vmcnt(0) 747; GFX6-NEXT: ; return to shader part epilog 748; 749; GFX8PLUS-LABEL: buffer_load_xy: 750; GFX8PLUS: ; %bb.0: ; %main_body 751; GFX8PLUS-NEXT: v_mov_b32_e32 v0, 0 752; GFX8PLUS-NEXT: buffer_load_format_xy v[0:1], v0, s[0:3], 0 idxen 753; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 754; GFX8PLUS-NEXT: ; return to shader part epilog 755; 756; GFX11-LABEL: buffer_load_xy: 757; GFX11: ; %bb.0: ; %main_body 758; GFX11-NEXT: v_mov_b32_e32 v0, 0 759; GFX11-NEXT: buffer_load_format_xy v[0:1], v0, s[0:3], 0 idxen 760; GFX11-NEXT: s_waitcnt vmcnt(0) 761; GFX11-NEXT: ; return to shader part epilog 762; 763; NOPRT-LABEL: buffer_load_xy: 764; NOPRT: ; %bb.0: ; %main_body 765; NOPRT-NEXT: v_mov_b32_e32 v0, 0 766; NOPRT-NEXT: buffer_load_format_xy v[0:1], v0, s[0:3], 0 idxen 767; NOPRT-NEXT: s_waitcnt vmcnt(0) 768; NOPRT-NEXT: ; return to shader part epilog 769; 770; GFX12-LABEL: buffer_load_xy: 771; GFX12: ; %bb.0: ; %main_body 772; GFX12-NEXT: v_mov_b32_e32 v0, 0 773; GFX12-NEXT: buffer_load_format_xy v[0:1], v0, s[0:3], null idxen 774; GFX12-NEXT: s_wait_loadcnt 0x0 775; GFX12-NEXT: ; return to shader part epilog 776main_body: 777 %data = call <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) 778 ret <2 x float> %data 779} 780 781define amdgpu_cs float @buffer_load_v4i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %out) { 782; GFX6-LABEL: buffer_load_v4i32_tfe: 783; GFX6: ; %bb.0: 784; GFX6-NEXT: v_mov_b32_e32 v2, 0 785; GFX6-NEXT: v_mov_b32_e32 v7, 2 786; GFX6-NEXT: v_mov_b32_e32 v3, v2 787; GFX6-NEXT: v_mov_b32_e32 v4, v2 788; GFX6-NEXT: v_mov_b32_e32 v5, v2 789; GFX6-NEXT: v_mov_b32_e32 v6, v2 790; GFX6-NEXT: buffer_load_format_xyzw v[2:6], v7, s[0:3], 0 idxen tfe 791; GFX6-NEXT: s_mov_b32 s2, 0 792; GFX6-NEXT: s_mov_b32 s3, 0xf000 793; GFX6-NEXT: s_mov_b32 s0, s2 794; GFX6-NEXT: s_mov_b32 s1, s2 795; GFX6-NEXT: s_waitcnt vmcnt(0) 796; GFX6-NEXT: buffer_store_dwordx4 v[2:5], v[0:1], s[0:3], 0 addr64 797; GFX6-NEXT: v_mov_b32_e32 v0, v6 798; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 799; GFX6-NEXT: ; return to shader part epilog 800; 801; GFX8PLUS-LABEL: buffer_load_v4i32_tfe: 802; GFX8PLUS: ; %bb.0: 803; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0 804; GFX8PLUS-NEXT: v_mov_b32_e32 v7, 2 805; GFX8PLUS-NEXT: v_mov_b32_e32 v3, v2 806; GFX8PLUS-NEXT: v_mov_b32_e32 v4, v2 807; GFX8PLUS-NEXT: v_mov_b32_e32 v5, v2 808; GFX8PLUS-NEXT: v_mov_b32_e32 v6, v2 809; GFX8PLUS-NEXT: buffer_load_format_xyzw v[2:6], v7, s[0:3], 0 idxen tfe 810; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 811; GFX8PLUS-NEXT: flat_store_dwordx4 v[0:1], v[2:5] 812; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v6 813; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 814; GFX8PLUS-NEXT: ; return to shader part epilog 815; 816; GFX11-LABEL: buffer_load_v4i32_tfe: 817; GFX11: ; %bb.0: 818; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v7, 2 819; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 820; GFX11-NEXT: v_mov_b32_e32 v3, v2 821; GFX11-NEXT: v_mov_b32_e32 v4, v2 822; GFX11-NEXT: v_mov_b32_e32 v5, v2 823; GFX11-NEXT: v_mov_b32_e32 v6, v2 824; GFX11-NEXT: buffer_load_format_xyzw v[2:6], v7, s[0:3], 0 idxen tfe 825; GFX11-NEXT: s_waitcnt vmcnt(0) 826; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off 827; GFX11-NEXT: v_mov_b32_e32 v0, v6 828; GFX11-NEXT: ; return to shader part epilog 829; 830; NOPRT-LABEL: buffer_load_v4i32_tfe: 831; NOPRT: ; %bb.0: 832; NOPRT-NEXT: v_mov_b32_e32 v2, 2 833; NOPRT-NEXT: v_mov_b32_e32 v6, 0 834; NOPRT-NEXT: buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe 835; NOPRT-NEXT: s_waitcnt vmcnt(0) 836; NOPRT-NEXT: global_store_b128 v[0:1], v[2:5], off 837; NOPRT-NEXT: v_mov_b32_e32 v0, v6 838; NOPRT-NEXT: ; return to shader part epilog 839; 840; GFX12-LABEL: buffer_load_v4i32_tfe: 841; GFX12: ; %bb.0: 842; GFX12-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v7, 2 843; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 844; GFX12-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v4, v2 845; GFX12-NEXT: v_dual_mov_b32 v5, v2 :: v_dual_mov_b32 v6, v2 846; GFX12-NEXT: buffer_load_format_xyzw v[2:6], v7, s[0:3], null idxen tfe 847; GFX12-NEXT: s_wait_loadcnt 0x0 848; GFX12-NEXT: global_store_b128 v[0:1], v[2:5], off 849; GFX12-NEXT: v_mov_b32_e32 v0, v6 850; GFX12-NEXT: ; return to shader part epilog 851 %load = call { <4 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v4i32i32s(<4 x i32> %rsrc, i32 2, i32 0, i32 0, i32 0) 852 %data = extractvalue { <4 x i32>, i32 } %load, 0 853 store <4 x i32> %data, ptr addrspace(1) %out 854 %status = extractvalue { <4 x i32>, i32 } %load, 1 855 %fstatus = bitcast i32 %status to float 856 ret float %fstatus 857} 858 859define amdgpu_cs float @buffer_load_v4f32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %out) { 860; GFX6-LABEL: buffer_load_v4f32_tfe: 861; GFX6: ; %bb.0: 862; GFX6-NEXT: v_mov_b32_e32 v2, 0 863; GFX6-NEXT: v_mov_b32_e32 v3, v2 864; GFX6-NEXT: v_mov_b32_e32 v4, v2 865; GFX6-NEXT: v_mov_b32_e32 v5, v2 866; GFX6-NEXT: v_mov_b32_e32 v6, v2 867; GFX6-NEXT: buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe 868; GFX6-NEXT: s_mov_b32 s2, 0 869; GFX6-NEXT: s_mov_b32 s3, 0xf000 870; GFX6-NEXT: s_mov_b32 s0, s2 871; GFX6-NEXT: s_mov_b32 s1, s2 872; GFX6-NEXT: s_waitcnt vmcnt(0) 873; GFX6-NEXT: buffer_store_dwordx4 v[2:5], v[0:1], s[0:3], 0 addr64 874; GFX6-NEXT: v_mov_b32_e32 v0, v6 875; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 876; GFX6-NEXT: ; return to shader part epilog 877; 878; GFX8PLUS-LABEL: buffer_load_v4f32_tfe: 879; GFX8PLUS: ; %bb.0: 880; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0 881; GFX8PLUS-NEXT: v_mov_b32_e32 v3, v2 882; GFX8PLUS-NEXT: v_mov_b32_e32 v4, v2 883; GFX8PLUS-NEXT: v_mov_b32_e32 v5, v2 884; GFX8PLUS-NEXT: v_mov_b32_e32 v6, v2 885; GFX8PLUS-NEXT: buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe 886; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 887; GFX8PLUS-NEXT: flat_store_dwordx4 v[0:1], v[2:5] 888; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v6 889; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 890; GFX8PLUS-NEXT: ; return to shader part epilog 891; 892; GFX11-LABEL: buffer_load_v4f32_tfe: 893; GFX11: ; %bb.0: 894; GFX11-NEXT: v_mov_b32_e32 v2, 0 895; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 896; GFX11-NEXT: v_mov_b32_e32 v3, v2 897; GFX11-NEXT: v_mov_b32_e32 v4, v2 898; GFX11-NEXT: v_mov_b32_e32 v5, v2 899; GFX11-NEXT: v_mov_b32_e32 v6, v2 900; GFX11-NEXT: buffer_load_format_xyzw v[2:6], v2, s[0:3], 0 idxen tfe 901; GFX11-NEXT: s_waitcnt vmcnt(0) 902; GFX11-NEXT: global_store_b128 v[0:1], v[2:5], off 903; GFX11-NEXT: v_mov_b32_e32 v0, v6 904; GFX11-NEXT: ; return to shader part epilog 905; 906; NOPRT-LABEL: buffer_load_v4f32_tfe: 907; NOPRT: ; %bb.0: 908; NOPRT-NEXT: v_mov_b32_e32 v6, 0 909; NOPRT-NEXT: buffer_load_format_xyzw v[2:6], v6, s[0:3], 0 idxen tfe 910; NOPRT-NEXT: s_waitcnt vmcnt(0) 911; NOPRT-NEXT: global_store_b128 v[0:1], v[2:5], off 912; NOPRT-NEXT: v_mov_b32_e32 v0, v6 913; NOPRT-NEXT: ; return to shader part epilog 914; 915; GFX12-LABEL: buffer_load_v4f32_tfe: 916; GFX12: ; %bb.0: 917; GFX12-NEXT: v_mov_b32_e32 v2, 0 918; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 919; GFX12-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v4, v2 920; GFX12-NEXT: v_dual_mov_b32 v5, v2 :: v_dual_mov_b32 v6, v2 921; GFX12-NEXT: buffer_load_format_xyzw v[2:6], v2, s[0:3], null idxen tfe 922; GFX12-NEXT: s_wait_loadcnt 0x0 923; GFX12-NEXT: global_store_b128 v[0:1], v[2:5], off 924; GFX12-NEXT: v_mov_b32_e32 v0, v6 925; GFX12-NEXT: ; return to shader part epilog 926 %load = call { <4 x float>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v4f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) 927 %data = extractvalue { <4 x float>, i32 } %load, 0 928 store <4 x float> %data, ptr addrspace(1) %out 929 %status = extractvalue { <4 x float>, i32 } %load, 1 930 %fstatus = bitcast i32 %status to float 931 ret float %fstatus 932} 933 934define amdgpu_cs float @buffer_load_v3i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %out) { 935; GFX6-LABEL: buffer_load_v3i32_tfe: 936; GFX6: ; %bb.0: 937; GFX6-NEXT: v_mov_b32_e32 v2, 0 938; GFX6-NEXT: v_mov_b32_e32 v3, v2 939; GFX6-NEXT: v_mov_b32_e32 v4, v2 940; GFX6-NEXT: v_mov_b32_e32 v5, v2 941; GFX6-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe 942; GFX6-NEXT: s_mov_b32 s2, 0 943; GFX6-NEXT: s_mov_b32 s3, 0xf000 944; GFX6-NEXT: s_mov_b32 s0, s2 945; GFX6-NEXT: s_mov_b32 s1, s2 946; GFX6-NEXT: s_waitcnt vmcnt(0) 947; GFX6-NEXT: buffer_store_dword v4, v[0:1], s[0:3], 0 addr64 offset:8 948; GFX6-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 949; GFX6-NEXT: v_mov_b32_e32 v0, v5 950; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 951; GFX6-NEXT: ; return to shader part epilog 952; 953; GFX8PLUS-LABEL: buffer_load_v3i32_tfe: 954; GFX8PLUS: ; %bb.0: 955; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0 956; GFX8PLUS-NEXT: v_mov_b32_e32 v3, v2 957; GFX8PLUS-NEXT: v_mov_b32_e32 v4, v2 958; GFX8PLUS-NEXT: v_mov_b32_e32 v5, v2 959; GFX8PLUS-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe 960; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 961; GFX8PLUS-NEXT: flat_store_dwordx3 v[0:1], v[2:4] 962; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v5 963; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 964; GFX8PLUS-NEXT: ; return to shader part epilog 965; 966; GFX11-LABEL: buffer_load_v3i32_tfe: 967; GFX11: ; %bb.0: 968; GFX11-NEXT: v_mov_b32_e32 v2, 0 969; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 970; GFX11-NEXT: v_mov_b32_e32 v3, v2 971; GFX11-NEXT: v_mov_b32_e32 v4, v2 972; GFX11-NEXT: v_mov_b32_e32 v5, v2 973; GFX11-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe 974; GFX11-NEXT: s_waitcnt vmcnt(0) 975; GFX11-NEXT: global_store_b96 v[0:1], v[2:4], off 976; GFX11-NEXT: v_mov_b32_e32 v0, v5 977; GFX11-NEXT: ; return to shader part epilog 978; 979; NOPRT-LABEL: buffer_load_v3i32_tfe: 980; NOPRT: ; %bb.0: 981; NOPRT-NEXT: v_mov_b32_e32 v5, 0 982; NOPRT-NEXT: buffer_load_format_xyz v[2:5], v5, s[0:3], 0 idxen tfe 983; NOPRT-NEXT: s_waitcnt vmcnt(0) 984; NOPRT-NEXT: global_store_b96 v[0:1], v[2:4], off 985; NOPRT-NEXT: v_mov_b32_e32 v0, v5 986; NOPRT-NEXT: ; return to shader part epilog 987; 988; GFX12-LABEL: buffer_load_v3i32_tfe: 989; GFX12: ; %bb.0: 990; GFX12-NEXT: v_mov_b32_e32 v2, 0 991; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 992; GFX12-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v4, v2 993; GFX12-NEXT: v_mov_b32_e32 v5, v2 994; GFX12-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], null idxen tfe 995; GFX12-NEXT: s_wait_loadcnt 0x0 996; GFX12-NEXT: global_store_b96 v[0:1], v[2:4], off 997; GFX12-NEXT: v_mov_b32_e32 v0, v5 998; GFX12-NEXT: ; return to shader part epilog 999 %load = call { <3 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v3i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) 1000 %data = extractvalue { <3 x i32>, i32 } %load, 0 1001 store <3 x i32> %data, ptr addrspace(1) %out 1002 %status = extractvalue { <3 x i32>, i32 } %load, 1 1003 %fstatus = bitcast i32 %status to float 1004 ret float %fstatus 1005} 1006 1007define amdgpu_cs float @buffer_load_v3f32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %out) { 1008; GFX6-LABEL: buffer_load_v3f32_tfe: 1009; GFX6: ; %bb.0: 1010; GFX6-NEXT: v_mov_b32_e32 v2, 0 1011; GFX6-NEXT: v_mov_b32_e32 v3, v2 1012; GFX6-NEXT: v_mov_b32_e32 v4, v2 1013; GFX6-NEXT: v_mov_b32_e32 v5, v2 1014; GFX6-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe 1015; GFX6-NEXT: s_mov_b32 s2, 0 1016; GFX6-NEXT: s_mov_b32 s3, 0xf000 1017; GFX6-NEXT: s_mov_b32 s0, s2 1018; GFX6-NEXT: s_mov_b32 s1, s2 1019; GFX6-NEXT: s_waitcnt vmcnt(0) 1020; GFX6-NEXT: buffer_store_dword v4, v[0:1], s[0:3], 0 addr64 offset:8 1021; GFX6-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 1022; GFX6-NEXT: v_mov_b32_e32 v0, v5 1023; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1024; GFX6-NEXT: ; return to shader part epilog 1025; 1026; GFX8PLUS-LABEL: buffer_load_v3f32_tfe: 1027; GFX8PLUS: ; %bb.0: 1028; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0 1029; GFX8PLUS-NEXT: v_mov_b32_e32 v3, v2 1030; GFX8PLUS-NEXT: v_mov_b32_e32 v4, v2 1031; GFX8PLUS-NEXT: v_mov_b32_e32 v5, v2 1032; GFX8PLUS-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe 1033; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 1034; GFX8PLUS-NEXT: flat_store_dwordx3 v[0:1], v[2:4] 1035; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v5 1036; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 1037; GFX8PLUS-NEXT: ; return to shader part epilog 1038; 1039; GFX11-LABEL: buffer_load_v3f32_tfe: 1040; GFX11: ; %bb.0: 1041; GFX11-NEXT: v_mov_b32_e32 v2, 0 1042; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1043; GFX11-NEXT: v_mov_b32_e32 v3, v2 1044; GFX11-NEXT: v_mov_b32_e32 v4, v2 1045; GFX11-NEXT: v_mov_b32_e32 v5, v2 1046; GFX11-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe 1047; GFX11-NEXT: s_waitcnt vmcnt(0) 1048; GFX11-NEXT: global_store_b96 v[0:1], v[2:4], off 1049; GFX11-NEXT: v_mov_b32_e32 v0, v5 1050; GFX11-NEXT: ; return to shader part epilog 1051; 1052; NOPRT-LABEL: buffer_load_v3f32_tfe: 1053; NOPRT: ; %bb.0: 1054; NOPRT-NEXT: v_mov_b32_e32 v5, 0 1055; NOPRT-NEXT: buffer_load_format_xyz v[2:5], v5, s[0:3], 0 idxen tfe 1056; NOPRT-NEXT: s_waitcnt vmcnt(0) 1057; NOPRT-NEXT: global_store_b96 v[0:1], v[2:4], off 1058; NOPRT-NEXT: v_mov_b32_e32 v0, v5 1059; NOPRT-NEXT: ; return to shader part epilog 1060; 1061; GFX12-LABEL: buffer_load_v3f32_tfe: 1062; GFX12: ; %bb.0: 1063; GFX12-NEXT: v_mov_b32_e32 v2, 0 1064; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1065; GFX12-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v4, v2 1066; GFX12-NEXT: v_mov_b32_e32 v5, v2 1067; GFX12-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], null idxen tfe 1068; GFX12-NEXT: s_wait_loadcnt 0x0 1069; GFX12-NEXT: global_store_b96 v[0:1], v[2:4], off 1070; GFX12-NEXT: v_mov_b32_e32 v0, v5 1071; GFX12-NEXT: ; return to shader part epilog 1072 %load = call { <3 x float>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v3f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) 1073 %data = extractvalue { <3 x float>, i32 } %load, 0 1074 store <3 x float> %data, ptr addrspace(1) %out 1075 %status = extractvalue { <3 x float>, i32 } %load, 1 1076 %fstatus = bitcast i32 %status to float 1077 ret float %fstatus 1078} 1079 1080define amdgpu_cs float @buffer_load_v2i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %out) { 1081; GFX6-LABEL: buffer_load_v2i32_tfe: 1082; GFX6: ; %bb.0: 1083; GFX6-NEXT: v_mov_b32_e32 v2, 0 1084; GFX6-NEXT: v_mov_b32_e32 v3, v2 1085; GFX6-NEXT: v_mov_b32_e32 v4, v2 1086; GFX6-NEXT: v_mov_b32_e32 v5, v2 1087; GFX6-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe 1088; GFX6-NEXT: s_mov_b32 s2, 0 1089; GFX6-NEXT: s_mov_b32 s3, 0xf000 1090; GFX6-NEXT: s_mov_b32 s0, s2 1091; GFX6-NEXT: s_mov_b32 s1, s2 1092; GFX6-NEXT: s_waitcnt vmcnt(0) 1093; GFX6-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 1094; GFX6-NEXT: v_mov_b32_e32 v0, v4 1095; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1096; GFX6-NEXT: ; return to shader part epilog 1097; 1098; GFX8PLUS-LABEL: buffer_load_v2i32_tfe: 1099; GFX8PLUS: ; %bb.0: 1100; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0 1101; GFX8PLUS-NEXT: v_mov_b32_e32 v3, v2 1102; GFX8PLUS-NEXT: v_mov_b32_e32 v4, v2 1103; GFX8PLUS-NEXT: buffer_load_format_xy v[2:4], v2, s[0:3], 0 idxen tfe 1104; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 1105; GFX8PLUS-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 1106; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v4 1107; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 1108; GFX8PLUS-NEXT: ; return to shader part epilog 1109; 1110; GFX11-LABEL: buffer_load_v2i32_tfe: 1111; GFX11: ; %bb.0: 1112; GFX11-NEXT: v_mov_b32_e32 v2, 0 1113; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1114; GFX11-NEXT: v_mov_b32_e32 v3, v2 1115; GFX11-NEXT: v_mov_b32_e32 v4, v2 1116; GFX11-NEXT: buffer_load_format_xy v[2:4], v2, s[0:3], 0 idxen tfe 1117; GFX11-NEXT: s_waitcnt vmcnt(0) 1118; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off 1119; GFX11-NEXT: v_mov_b32_e32 v0, v4 1120; GFX11-NEXT: ; return to shader part epilog 1121; 1122; NOPRT-LABEL: buffer_load_v2i32_tfe: 1123; NOPRT: ; %bb.0: 1124; NOPRT-NEXT: v_mov_b32_e32 v4, 0 1125; NOPRT-NEXT: buffer_load_format_xy v[2:4], v4, s[0:3], 0 idxen tfe 1126; NOPRT-NEXT: s_waitcnt vmcnt(0) 1127; NOPRT-NEXT: global_store_b64 v[0:1], v[2:3], off 1128; NOPRT-NEXT: v_mov_b32_e32 v0, v4 1129; NOPRT-NEXT: ; return to shader part epilog 1130; 1131; GFX12-LABEL: buffer_load_v2i32_tfe: 1132; GFX12: ; %bb.0: 1133; GFX12-NEXT: v_mov_b32_e32 v2, 0 1134; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1135; GFX12-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v4, v2 1136; GFX12-NEXT: buffer_load_format_xy v[2:4], v2, s[0:3], null idxen tfe 1137; GFX12-NEXT: s_wait_loadcnt 0x0 1138; GFX12-NEXT: global_store_b64 v[0:1], v[2:3], off 1139; GFX12-NEXT: v_mov_b32_e32 v0, v4 1140; GFX12-NEXT: ; return to shader part epilog 1141 %load = call { <2 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v2i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) 1142 %data = extractvalue { <2 x i32>, i32 } %load, 0 1143 store <2 x i32> %data, ptr addrspace(1) %out 1144 %status = extractvalue { <2 x i32>, i32 } %load, 1 1145 %fstatus = bitcast i32 %status to float 1146 ret float %fstatus 1147} 1148 1149define amdgpu_cs float @buffer_load_v2f32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %out) { 1150; GFX6-LABEL: buffer_load_v2f32_tfe: 1151; GFX6: ; %bb.0: 1152; GFX6-NEXT: v_mov_b32_e32 v2, 0 1153; GFX6-NEXT: v_mov_b32_e32 v3, v2 1154; GFX6-NEXT: v_mov_b32_e32 v4, v2 1155; GFX6-NEXT: v_mov_b32_e32 v5, v2 1156; GFX6-NEXT: buffer_load_format_xyz v[2:5], v2, s[0:3], 0 idxen tfe 1157; GFX6-NEXT: s_mov_b32 s2, 0 1158; GFX6-NEXT: s_mov_b32 s3, 0xf000 1159; GFX6-NEXT: s_mov_b32 s0, s2 1160; GFX6-NEXT: s_mov_b32 s1, s2 1161; GFX6-NEXT: s_waitcnt vmcnt(0) 1162; GFX6-NEXT: buffer_store_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64 1163; GFX6-NEXT: v_mov_b32_e32 v0, v4 1164; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1165; GFX6-NEXT: ; return to shader part epilog 1166; 1167; GFX8PLUS-LABEL: buffer_load_v2f32_tfe: 1168; GFX8PLUS: ; %bb.0: 1169; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0 1170; GFX8PLUS-NEXT: v_mov_b32_e32 v3, v2 1171; GFX8PLUS-NEXT: v_mov_b32_e32 v4, v2 1172; GFX8PLUS-NEXT: buffer_load_format_xy v[2:4], v2, s[0:3], 0 idxen tfe 1173; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 1174; GFX8PLUS-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 1175; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v4 1176; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 1177; GFX8PLUS-NEXT: ; return to shader part epilog 1178; 1179; GFX11-LABEL: buffer_load_v2f32_tfe: 1180; GFX11: ; %bb.0: 1181; GFX11-NEXT: v_mov_b32_e32 v2, 0 1182; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1183; GFX11-NEXT: v_mov_b32_e32 v3, v2 1184; GFX11-NEXT: v_mov_b32_e32 v4, v2 1185; GFX11-NEXT: buffer_load_format_xy v[2:4], v2, s[0:3], 0 idxen tfe 1186; GFX11-NEXT: s_waitcnt vmcnt(0) 1187; GFX11-NEXT: global_store_b64 v[0:1], v[2:3], off 1188; GFX11-NEXT: v_mov_b32_e32 v0, v4 1189; GFX11-NEXT: ; return to shader part epilog 1190; 1191; NOPRT-LABEL: buffer_load_v2f32_tfe: 1192; NOPRT: ; %bb.0: 1193; NOPRT-NEXT: v_mov_b32_e32 v4, 0 1194; NOPRT-NEXT: buffer_load_format_xy v[2:4], v4, s[0:3], 0 idxen tfe 1195; NOPRT-NEXT: s_waitcnt vmcnt(0) 1196; NOPRT-NEXT: global_store_b64 v[0:1], v[2:3], off 1197; NOPRT-NEXT: v_mov_b32_e32 v0, v4 1198; NOPRT-NEXT: ; return to shader part epilog 1199; 1200; GFX12-LABEL: buffer_load_v2f32_tfe: 1201; GFX12: ; %bb.0: 1202; GFX12-NEXT: v_mov_b32_e32 v2, 0 1203; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1204; GFX12-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v4, v2 1205; GFX12-NEXT: buffer_load_format_xy v[2:4], v2, s[0:3], null idxen tfe 1206; GFX12-NEXT: s_wait_loadcnt 0x0 1207; GFX12-NEXT: global_store_b64 v[0:1], v[2:3], off 1208; GFX12-NEXT: v_mov_b32_e32 v0, v4 1209; GFX12-NEXT: ; return to shader part epilog 1210 %load = call { <2 x float>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v2f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) 1211 %data = extractvalue { <2 x float>, i32 } %load, 0 1212 store <2 x float> %data, ptr addrspace(1) %out 1213 %status = extractvalue { <2 x float>, i32 } %load, 1 1214 %fstatus = bitcast i32 %status to float 1215 ret float %fstatus 1216} 1217 1218define amdgpu_cs float @buffer_load_i32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %out) { 1219; GFX6-LABEL: buffer_load_i32_tfe: 1220; GFX6: ; %bb.0: 1221; GFX6-NEXT: v_mov_b32_e32 v2, 0 1222; GFX6-NEXT: v_mov_b32_e32 v3, v2 1223; GFX6-NEXT: buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe 1224; GFX6-NEXT: s_mov_b32 s2, 0 1225; GFX6-NEXT: s_mov_b32 s3, 0xf000 1226; GFX6-NEXT: s_mov_b32 s0, s2 1227; GFX6-NEXT: s_mov_b32 s1, s2 1228; GFX6-NEXT: s_waitcnt vmcnt(0) 1229; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 1230; GFX6-NEXT: v_mov_b32_e32 v0, v3 1231; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1232; GFX6-NEXT: ; return to shader part epilog 1233; 1234; GFX8PLUS-LABEL: buffer_load_i32_tfe: 1235; GFX8PLUS: ; %bb.0: 1236; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0 1237; GFX8PLUS-NEXT: v_mov_b32_e32 v3, v2 1238; GFX8PLUS-NEXT: buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe 1239; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 1240; GFX8PLUS-NEXT: flat_store_dword v[0:1], v2 1241; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v3 1242; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 1243; GFX8PLUS-NEXT: ; return to shader part epilog 1244; 1245; GFX11-LABEL: buffer_load_i32_tfe: 1246; GFX11: ; %bb.0: 1247; GFX11-NEXT: v_mov_b32_e32 v2, 0 1248; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1249; GFX11-NEXT: v_mov_b32_e32 v3, v2 1250; GFX11-NEXT: buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe 1251; GFX11-NEXT: s_waitcnt vmcnt(0) 1252; GFX11-NEXT: global_store_b32 v[0:1], v2, off 1253; GFX11-NEXT: v_mov_b32_e32 v0, v3 1254; GFX11-NEXT: ; return to shader part epilog 1255; 1256; NOPRT-LABEL: buffer_load_i32_tfe: 1257; NOPRT: ; %bb.0: 1258; NOPRT-NEXT: v_mov_b32_e32 v3, 0 1259; NOPRT-NEXT: buffer_load_format_x v[2:3], v3, s[0:3], 0 idxen tfe 1260; NOPRT-NEXT: s_waitcnt vmcnt(0) 1261; NOPRT-NEXT: global_store_b32 v[0:1], v2, off 1262; NOPRT-NEXT: v_mov_b32_e32 v0, v3 1263; NOPRT-NEXT: ; return to shader part epilog 1264; 1265; GFX12-LABEL: buffer_load_i32_tfe: 1266; GFX12: ; %bb.0: 1267; GFX12-NEXT: v_mov_b32_e32 v2, 0 1268; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1269; GFX12-NEXT: v_mov_b32_e32 v3, v2 1270; GFX12-NEXT: buffer_load_format_x v[2:3], v2, s[0:3], null idxen tfe 1271; GFX12-NEXT: s_wait_loadcnt 0x0 1272; GFX12-NEXT: global_store_b32 v[0:1], v2, off 1273; GFX12-NEXT: v_mov_b32_e32 v0, v3 1274; GFX12-NEXT: ; return to shader part epilog 1275 %load = call { i32, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_i32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) 1276 %data = extractvalue { i32, i32 } %load, 0 1277 store i32 %data, ptr addrspace(1) %out 1278 %status = extractvalue { i32, i32 } %load, 1 1279 %fstatus = bitcast i32 %status to float 1280 ret float %fstatus 1281} 1282 1283define amdgpu_cs float @buffer_load_f32_tfe(<4 x i32> inreg %rsrc, ptr addrspace(1) %out) { 1284; GFX6-LABEL: buffer_load_f32_tfe: 1285; GFX6: ; %bb.0: 1286; GFX6-NEXT: v_mov_b32_e32 v2, 0 1287; GFX6-NEXT: v_mov_b32_e32 v3, v2 1288; GFX6-NEXT: buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe 1289; GFX6-NEXT: s_mov_b32 s2, 0 1290; GFX6-NEXT: s_mov_b32 s3, 0xf000 1291; GFX6-NEXT: s_mov_b32 s0, s2 1292; GFX6-NEXT: s_mov_b32 s1, s2 1293; GFX6-NEXT: s_waitcnt vmcnt(0) 1294; GFX6-NEXT: buffer_store_dword v2, v[0:1], s[0:3], 0 addr64 1295; GFX6-NEXT: v_mov_b32_e32 v0, v3 1296; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1297; GFX6-NEXT: ; return to shader part epilog 1298; 1299; GFX8PLUS-LABEL: buffer_load_f32_tfe: 1300; GFX8PLUS: ; %bb.0: 1301; GFX8PLUS-NEXT: v_mov_b32_e32 v2, 0 1302; GFX8PLUS-NEXT: v_mov_b32_e32 v3, v2 1303; GFX8PLUS-NEXT: buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe 1304; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 1305; GFX8PLUS-NEXT: flat_store_dword v[0:1], v2 1306; GFX8PLUS-NEXT: v_mov_b32_e32 v0, v3 1307; GFX8PLUS-NEXT: s_waitcnt vmcnt(0) 1308; GFX8PLUS-NEXT: ; return to shader part epilog 1309; 1310; GFX11-LABEL: buffer_load_f32_tfe: 1311; GFX11: ; %bb.0: 1312; GFX11-NEXT: v_mov_b32_e32 v2, 0 1313; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1314; GFX11-NEXT: v_mov_b32_e32 v3, v2 1315; GFX11-NEXT: buffer_load_format_x v[2:3], v2, s[0:3], 0 idxen tfe 1316; GFX11-NEXT: s_waitcnt vmcnt(0) 1317; GFX11-NEXT: global_store_b32 v[0:1], v2, off 1318; GFX11-NEXT: v_mov_b32_e32 v0, v3 1319; GFX11-NEXT: ; return to shader part epilog 1320; 1321; NOPRT-LABEL: buffer_load_f32_tfe: 1322; NOPRT: ; %bb.0: 1323; NOPRT-NEXT: v_mov_b32_e32 v3, 0 1324; NOPRT-NEXT: buffer_load_format_x v[2:3], v3, s[0:3], 0 idxen tfe 1325; NOPRT-NEXT: s_waitcnt vmcnt(0) 1326; NOPRT-NEXT: global_store_b32 v[0:1], v2, off 1327; NOPRT-NEXT: v_mov_b32_e32 v0, v3 1328; NOPRT-NEXT: ; return to shader part epilog 1329; 1330; GFX12-LABEL: buffer_load_f32_tfe: 1331; GFX12: ; %bb.0: 1332; GFX12-NEXT: v_mov_b32_e32 v2, 0 1333; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1334; GFX12-NEXT: v_mov_b32_e32 v3, v2 1335; GFX12-NEXT: buffer_load_format_x v[2:3], v2, s[0:3], null idxen tfe 1336; GFX12-NEXT: s_wait_loadcnt 0x0 1337; GFX12-NEXT: global_store_b32 v[0:1], v2, off 1338; GFX12-NEXT: v_mov_b32_e32 v0, v3 1339; GFX12-NEXT: ; return to shader part epilog 1340 %load = call { float, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_f32i32s(<4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) 1341 %data = extractvalue { float, i32 } %load, 0 1342 store float %data, ptr addrspace(1) %out 1343 %status = extractvalue { float, i32 } %load, 1 1344 %fstatus = bitcast i32 %status to float 1345 ret float %fstatus 1346} 1347 1348declare float @llvm.amdgcn.struct.buffer.load.format.f32(<4 x i32>, i32, i32, i32, i32) #0 1349declare <2 x float> @llvm.amdgcn.struct.buffer.load.format.v2f32(<4 x i32>, i32, i32, i32, i32) #0 1350declare <4 x float> @llvm.amdgcn.struct.buffer.load.format.v4f32(<4 x i32>, i32, i32, i32, i32) #0 1351declare i32 @llvm.amdgcn.struct.buffer.load.format.i32(<4 x i32>, i32, i32, i32, i32) #0 1352declare { <4 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v4i32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0 1353declare { <4 x float>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v4f32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0 1354declare { <3 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v3i32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0 1355declare { <3 x float>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v3f32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0 1356declare { <2 x i32>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v2i32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0 1357declare { <2 x float>, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_v2f32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0 1358declare { i32, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_i32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0 1359declare { float, i32 } @llvm.amdgcn.struct.buffer.load.format.sl_f32i32s(<4 x i32>, i32, i32, i32, i32 immarg) #0 1360attributes #0 = { nounwind readonly } 1361