1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2;RUN: llc < %s -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s --check-prefixes=PREGFX10 3;RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefixes=PREGFX10 4;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX10 5;RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs | FileCheck %s --check-prefixes=GFX11 6 7define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load(ptr addrspace(8) inreg) { 8; PREGFX10-LABEL: buffer_load: 9; PREGFX10: ; %bb.0: ; %main_body 10; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 11; PREGFX10-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc 12; PREGFX10-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 slc 13; PREGFX10-NEXT: s_waitcnt vmcnt(0) 14; PREGFX10-NEXT: ; return to shader part epilog 15; 16; GFX10-LABEL: buffer_load: 17; GFX10: ; %bb.0: ; %main_body 18; GFX10-NEXT: s_clause 0x2 19; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 20; GFX10-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc 21; GFX10-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 slc 22; GFX10-NEXT: s_waitcnt vmcnt(0) 23; GFX10-NEXT: ; return to shader part epilog 24; 25; GFX11-LABEL: buffer_load: 26; GFX11: ; %bb.0: ; %main_body 27; GFX11-NEXT: s_clause 0x2 28; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 29; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 glc 30; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[0:3], 0 slc 31; GFX11-NEXT: s_waitcnt vmcnt(0) 32; GFX11-NEXT: ; return to shader part epilog 33main_body: 34 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 0) 35 %data_glc = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 1) 36 %data_slc = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 2) 37 %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %data, 0 38 %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %data_glc, 1 39 %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %data_slc, 2 40 ret {<4 x float>, <4 x float>, <4 x float>} %r2 41} 42 43define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load_dlc(ptr addrspace(8) inreg) { 44; PREGFX10-LABEL: buffer_load_dlc: 45; PREGFX10: ; %bb.0: ; %main_body 46; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 47; PREGFX10-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc 48; PREGFX10-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 slc 49; PREGFX10-NEXT: s_waitcnt vmcnt(0) 50; PREGFX10-NEXT: ; return to shader part epilog 51; 52; GFX10-LABEL: buffer_load_dlc: 53; GFX10: ; %bb.0: ; %main_body 54; GFX10-NEXT: s_clause 0x2 55; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 dlc 56; GFX10-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc dlc 57; GFX10-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 slc dlc 58; GFX10-NEXT: s_waitcnt vmcnt(0) 59; GFX10-NEXT: ; return to shader part epilog 60; 61; GFX11-LABEL: buffer_load_dlc: 62; GFX11: ; %bb.0: ; %main_body 63; GFX11-NEXT: s_clause 0x2 64; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 dlc 65; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 glc dlc 66; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[0:3], 0 slc dlc 67; GFX11-NEXT: s_waitcnt vmcnt(0) 68; GFX11-NEXT: ; return to shader part epilog 69main_body: 70 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 4) 71 %data_glc = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 5) 72 %data_slc = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 6) 73 %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %data, 0 74 %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %data_glc, 1 75 %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %data_slc, 2 76 ret {<4 x float>, <4 x float>, <4 x float>} %r2 77} 78 79define amdgpu_ps {<4 x float>, <4 x float>, <4 x float>} @buffer_load_volatile(ptr addrspace(8) inreg) { 80; PREGFX10-LABEL: buffer_load_volatile: 81; PREGFX10: ; %bb.0: ; %main_body 82; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 glc 83; PREGFX10-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc 84; PREGFX10-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 glc slc 85; PREGFX10-NEXT: s_waitcnt vmcnt(0) 86; PREGFX10-NEXT: ; return to shader part epilog 87; 88; GFX10-LABEL: buffer_load_volatile: 89; GFX10: ; %bb.0: ; %main_body 90; GFX10-NEXT: s_clause 0x2 91; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 glc dlc 92; GFX10-NEXT: buffer_load_dwordx4 v[4:7], off, s[0:3], 0 glc dlc 93; GFX10-NEXT: buffer_load_dwordx4 v[8:11], off, s[0:3], 0 glc slc dlc 94; GFX10-NEXT: s_waitcnt vmcnt(0) 95; GFX10-NEXT: ; return to shader part epilog 96; 97; GFX11-LABEL: buffer_load_volatile: 98; GFX11: ; %bb.0: ; %main_body 99; GFX11-NEXT: s_clause 0x2 100; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 glc dlc 101; GFX11-NEXT: buffer_load_b128 v[4:7], off, s[0:3], 0 glc dlc 102; GFX11-NEXT: buffer_load_b128 v[8:11], off, s[0:3], 0 glc slc dlc 103; GFX11-NEXT: s_waitcnt vmcnt(0) 104; GFX11-NEXT: ; return to shader part epilog 105main_body: 106 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 -2147483648) 107 %data_glc = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 -2147483647) 108 %data_slc = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 0, i32 0, i32 -2147483646) 109 %r0 = insertvalue {<4 x float>, <4 x float>, <4 x float>} undef, <4 x float> %data, 0 110 %r1 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r0, <4 x float> %data_glc, 1 111 %r2 = insertvalue {<4 x float>, <4 x float>, <4 x float>} %r1, <4 x float> %data_slc, 2 112 ret {<4 x float>, <4 x float>, <4 x float>} %r2 113} 114 115define amdgpu_ps <4 x float> @buffer_load_immoffs(ptr addrspace(8) inreg) { 116; PREGFX10-LABEL: buffer_load_immoffs: 117; PREGFX10: ; %bb.0: ; %main_body 118; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:40 119; PREGFX10-NEXT: s_waitcnt vmcnt(0) 120; PREGFX10-NEXT: ; return to shader part epilog 121; 122; GFX10-LABEL: buffer_load_immoffs: 123; GFX10: ; %bb.0: ; %main_body 124; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:40 125; GFX10-NEXT: s_waitcnt vmcnt(0) 126; GFX10-NEXT: ; return to shader part epilog 127; 128; GFX11-LABEL: buffer_load_immoffs: 129; GFX11: ; %bb.0: ; %main_body 130; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 offset:40 131; GFX11-NEXT: s_waitcnt vmcnt(0) 132; GFX11-NEXT: ; return to shader part epilog 133main_body: 134 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 40, i32 0, i32 0) 135 ret <4 x float> %data 136} 137 138define amdgpu_ps <4 x float> @buffer_load_immoffs_large(ptr addrspace(8) inreg) { 139; PREGFX10-LABEL: buffer_load_immoffs_large: 140; PREGFX10: ; %bb.0: ; %main_body 141; PREGFX10-NEXT: s_movk_i32 s4, 0x1ffc 142; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], s4 offset:4 143; PREGFX10-NEXT: s_waitcnt vmcnt(0) 144; PREGFX10-NEXT: ; return to shader part epilog 145; 146; GFX10-LABEL: buffer_load_immoffs_large: 147; GFX10: ; %bb.0: ; %main_body 148; GFX10-NEXT: s_movk_i32 s4, 0x1ffc 149; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], s4 offset:4 150; GFX10-NEXT: s_waitcnt vmcnt(0) 151; GFX10-NEXT: ; return to shader part epilog 152; 153; GFX11-LABEL: buffer_load_immoffs_large: 154; GFX11: ; %bb.0: ; %main_body 155; GFX11-NEXT: s_movk_i32 s4, 0x1ffc 156; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], s4 offset:4 157; GFX11-NEXT: s_waitcnt vmcnt(0) 158; GFX11-NEXT: ; return to shader part epilog 159main_body: 160 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 4, i32 8188, i32 0) 161 ret <4 x float> %data 162} 163 164define amdgpu_ps <4 x float> @buffer_load_ofs(ptr addrspace(8) inreg, i32) { 165; PREGFX10-LABEL: buffer_load_ofs: 166; PREGFX10: ; %bb.0: ; %main_body 167; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen 168; PREGFX10-NEXT: s_waitcnt vmcnt(0) 169; PREGFX10-NEXT: ; return to shader part epilog 170; 171; GFX10-LABEL: buffer_load_ofs: 172; GFX10: ; %bb.0: ; %main_body 173; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen 174; GFX10-NEXT: s_waitcnt vmcnt(0) 175; GFX10-NEXT: ; return to shader part epilog 176; 177; GFX11-LABEL: buffer_load_ofs: 178; GFX11: ; %bb.0: ; %main_body 179; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen 180; GFX11-NEXT: s_waitcnt vmcnt(0) 181; GFX11-NEXT: ; return to shader part epilog 182main_body: 183 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 %1, i32 0, i32 0) 184 ret <4 x float> %data 185} 186 187define amdgpu_ps <4 x float> @buffer_load_ofs_imm(ptr addrspace(8) inreg, i32) { 188; PREGFX10-LABEL: buffer_load_ofs_imm: 189; PREGFX10: ; %bb.0: ; %main_body 190; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:60 191; PREGFX10-NEXT: s_waitcnt vmcnt(0) 192; PREGFX10-NEXT: ; return to shader part epilog 193; 194; GFX10-LABEL: buffer_load_ofs_imm: 195; GFX10: ; %bb.0: ; %main_body 196; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:60 197; GFX10-NEXT: s_waitcnt vmcnt(0) 198; GFX10-NEXT: ; return to shader part epilog 199; 200; GFX11-LABEL: buffer_load_ofs_imm: 201; GFX11: ; %bb.0: ; %main_body 202; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60 203; GFX11-NEXT: s_waitcnt vmcnt(0) 204; GFX11-NEXT: ; return to shader part epilog 205main_body: 206 %ofs = add i32 %1, 60 207 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 %ofs, i32 0, i32 0) 208 ret <4 x float> %data 209} 210 211define amdgpu_ps <4 x float> @buffer_load_voffset_large_12bit(ptr addrspace(8) inreg) { 212; PREGFX10-LABEL: buffer_load_voffset_large_12bit: 213; PREGFX10: ; %bb.0: ; %main_body 214; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4092 215; PREGFX10-NEXT: s_waitcnt vmcnt(0) 216; PREGFX10-NEXT: ; return to shader part epilog 217; 218; GFX10-LABEL: buffer_load_voffset_large_12bit: 219; GFX10: ; %bb.0: ; %main_body 220; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4092 221; GFX10-NEXT: s_waitcnt vmcnt(0) 222; GFX10-NEXT: ; return to shader part epilog 223; 224; GFX11-LABEL: buffer_load_voffset_large_12bit: 225; GFX11: ; %bb.0: ; %main_body 226; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 offset:4092 227; GFX11-NEXT: s_waitcnt vmcnt(0) 228; GFX11-NEXT: ; return to shader part epilog 229main_body: 230 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 4092, i32 0, i32 0) 231 ret <4 x float> %data 232} 233 234define amdgpu_ps <4 x float> @buffer_load_voffset_large_13bit(ptr addrspace(8) inreg) { 235; PREGFX10-LABEL: buffer_load_voffset_large_13bit: 236; PREGFX10: ; %bb.0: ; %main_body 237; PREGFX10-NEXT: v_mov_b32_e32 v0, 0x1000 238; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092 239; PREGFX10-NEXT: s_waitcnt vmcnt(0) 240; PREGFX10-NEXT: ; return to shader part epilog 241; 242; GFX10-LABEL: buffer_load_voffset_large_13bit: 243; GFX10: ; %bb.0: ; %main_body 244; GFX10-NEXT: v_mov_b32_e32 v0, 0x1000 245; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092 246; GFX10-NEXT: s_waitcnt vmcnt(0) 247; GFX10-NEXT: ; return to shader part epilog 248; 249; GFX11-LABEL: buffer_load_voffset_large_13bit: 250; GFX11: ; %bb.0: ; %main_body 251; GFX11-NEXT: v_mov_b32_e32 v0, 0x1000 252; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4092 253; GFX11-NEXT: s_waitcnt vmcnt(0) 254; GFX11-NEXT: ; return to shader part epilog 255main_body: 256 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 8188, i32 0, i32 0) 257 ret <4 x float> %data 258} 259 260define amdgpu_ps <4 x float> @buffer_load_voffset_large_16bit(ptr addrspace(8) inreg) { 261; PREGFX10-LABEL: buffer_load_voffset_large_16bit: 262; PREGFX10: ; %bb.0: ; %main_body 263; PREGFX10-NEXT: v_mov_b32_e32 v0, 0xf000 264; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092 265; PREGFX10-NEXT: s_waitcnt vmcnt(0) 266; PREGFX10-NEXT: ; return to shader part epilog 267; 268; GFX10-LABEL: buffer_load_voffset_large_16bit: 269; GFX10: ; %bb.0: ; %main_body 270; GFX10-NEXT: v_mov_b32_e32 v0, 0xf000 271; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092 272; GFX10-NEXT: s_waitcnt vmcnt(0) 273; GFX10-NEXT: ; return to shader part epilog 274; 275; GFX11-LABEL: buffer_load_voffset_large_16bit: 276; GFX11: ; %bb.0: ; %main_body 277; GFX11-NEXT: v_mov_b32_e32 v0, 0xf000 278; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4092 279; GFX11-NEXT: s_waitcnt vmcnt(0) 280; GFX11-NEXT: ; return to shader part epilog 281main_body: 282 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 65532, i32 0, i32 0) 283 ret <4 x float> %data 284} 285 286define amdgpu_ps <4 x float> @buffer_load_voffset_large_23bit(ptr addrspace(8) inreg) { 287; PREGFX10-LABEL: buffer_load_voffset_large_23bit: 288; PREGFX10: ; %bb.0: ; %main_body 289; PREGFX10-NEXT: v_mov_b32_e32 v0, 0x7ff000 290; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092 291; PREGFX10-NEXT: s_waitcnt vmcnt(0) 292; PREGFX10-NEXT: ; return to shader part epilog 293; 294; GFX10-LABEL: buffer_load_voffset_large_23bit: 295; GFX10: ; %bb.0: ; %main_body 296; GFX10-NEXT: v_mov_b32_e32 v0, 0x7ff000 297; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092 298; GFX10-NEXT: s_waitcnt vmcnt(0) 299; GFX10-NEXT: ; return to shader part epilog 300; 301; GFX11-LABEL: buffer_load_voffset_large_23bit: 302; GFX11: ; %bb.0: ; %main_body 303; GFX11-NEXT: v_mov_b32_e32 v0, 0x7ff000 304; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4092 305; GFX11-NEXT: s_waitcnt vmcnt(0) 306; GFX11-NEXT: ; return to shader part epilog 307main_body: 308 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 8388604, i32 0, i32 0) 309 ret <4 x float> %data 310} 311 312define amdgpu_ps <4 x float> @buffer_load_voffset_large_24bit(ptr addrspace(8) inreg) { 313; PREGFX10-LABEL: buffer_load_voffset_large_24bit: 314; PREGFX10: ; %bb.0: ; %main_body 315; PREGFX10-NEXT: v_mov_b32_e32 v0, 0xfff000 316; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092 317; PREGFX10-NEXT: s_waitcnt vmcnt(0) 318; PREGFX10-NEXT: ; return to shader part epilog 319; 320; GFX10-LABEL: buffer_load_voffset_large_24bit: 321; GFX10: ; %bb.0: ; %main_body 322; GFX10-NEXT: v_mov_b32_e32 v0, 0xfff000 323; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4092 324; GFX10-NEXT: s_waitcnt vmcnt(0) 325; GFX10-NEXT: ; return to shader part epilog 326; 327; GFX11-LABEL: buffer_load_voffset_large_24bit: 328; GFX11: ; %bb.0: ; %main_body 329; GFX11-NEXT: v_mov_b32_e32 v0, 0xfff000 330; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4092 331; GFX11-NEXT: s_waitcnt vmcnt(0) 332; GFX11-NEXT: ; return to shader part epilog 333main_body: 334 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 16777212, i32 0, i32 0) 335 ret <4 x float> %data 336} 337 338 339define amdgpu_ps float @buffer_load_x1(ptr addrspace(8) inreg %rsrc, i32 %ofs) { 340; PREGFX10-LABEL: buffer_load_x1: 341; PREGFX10: ; %bb.0: ; %main_body 342; PREGFX10-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen 343; PREGFX10-NEXT: s_waitcnt vmcnt(0) 344; PREGFX10-NEXT: ; return to shader part epilog 345; 346; GFX10-LABEL: buffer_load_x1: 347; GFX10: ; %bb.0: ; %main_body 348; GFX10-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen 349; GFX10-NEXT: s_waitcnt vmcnt(0) 350; GFX10-NEXT: ; return to shader part epilog 351; 352; GFX11-LABEL: buffer_load_x1: 353; GFX11: ; %bb.0: ; %main_body 354; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen 355; GFX11-NEXT: s_waitcnt vmcnt(0) 356; GFX11-NEXT: ; return to shader part epilog 357main_body: 358 %data = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 0, i32 0) 359 ret float %data 360} 361 362define amdgpu_ps <2 x float> @buffer_load_x2(ptr addrspace(8) inreg %rsrc, i32 %ofs) { 363; PREGFX10-LABEL: buffer_load_x2: 364; PREGFX10: ; %bb.0: ; %main_body 365; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[0:3], 0 offen 366; PREGFX10-NEXT: s_waitcnt vmcnt(0) 367; PREGFX10-NEXT: ; return to shader part epilog 368; 369; GFX10-LABEL: buffer_load_x2: 370; GFX10: ; %bb.0: ; %main_body 371; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[0:3], 0 offen 372; GFX10-NEXT: s_waitcnt vmcnt(0) 373; GFX10-NEXT: ; return to shader part epilog 374; 375; GFX11-LABEL: buffer_load_x2: 376; GFX11: ; %bb.0: ; %main_body 377; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen 378; GFX11-NEXT: s_waitcnt vmcnt(0) 379; GFX11-NEXT: ; return to shader part epilog 380main_body: 381 %data = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %ofs, i32 0, i32 0) 382 ret <2 x float> %data 383} 384 385define amdgpu_ps <4 x float> @buffer_load_negative_offset(ptr addrspace(8) inreg, i32 %ofs) { 386; GFX10-LABEL: buffer_load_negative_offset: 387; GFX10: ; %bb.0: ; %main_body 388; GFX10-NEXT: v_add_nc_u32_e32 v0, -16, v0 389; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen 390; GFX10-NEXT: s_waitcnt vmcnt(0) 391; GFX10-NEXT: ; return to shader part epilog 392; 393; GFX11-LABEL: buffer_load_negative_offset: 394; GFX11: ; %bb.0: ; %main_body 395; GFX11-NEXT: v_add_nc_u32_e32 v0, -16, v0 396; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen 397; GFX11-NEXT: s_waitcnt vmcnt(0) 398; GFX11-NEXT: ; return to shader part epilog 399main_body: 400 %ofs.1 = add i32 %ofs, -16 401 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 %ofs.1, i32 0, i32 0) 402 ret <4 x float> %data 403} 404 405define amdgpu_ps float @buffer_load_mmo(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %lds) { 406; GFX10-LABEL: buffer_load_mmo: 407; GFX10: ; %bb.0: ; %entry 408; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], 0 409; GFX10-NEXT: v_mov_b32_e32 v2, 0 410; GFX10-NEXT: ds_write2_b32 v0, v2, v2 offset1:4 411; GFX10-NEXT: s_waitcnt vmcnt(0) 412; GFX10-NEXT: v_mov_b32_e32 v0, v1 413; GFX10-NEXT: s_waitcnt lgkmcnt(0) 414; GFX10-NEXT: ; return to shader part epilog 415; 416; GFX11-LABEL: buffer_load_mmo: 417; GFX11: ; %bb.0: ; %entry 418; GFX11-NEXT: buffer_load_b32 v1, off, s[0:3], 0 419; GFX11-NEXT: v_mov_b32_e32 v2, 0 420; GFX11-NEXT: ds_store_2addr_b32 v0, v2, v2 offset1:4 421; GFX11-NEXT: s_waitcnt vmcnt(0) 422; GFX11-NEXT: v_mov_b32_e32 v0, v1 423; GFX11-NEXT: s_waitcnt lgkmcnt(0) 424; GFX11-NEXT: ; return to shader part epilog 425entry: 426 store float 0.0, ptr addrspace(3) %lds 427 %val = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 428 %tmp2 = getelementptr float, ptr addrspace(3) %lds, i32 4 429 store float 0.0, ptr addrspace(3) %tmp2 430 ret float %val 431} 432 433define amdgpu_ps void @buffer_load_x1_offen_merged_and(ptr addrspace(8) inreg %rsrc, i32 %a) { 434; PREGFX10-LABEL: buffer_load_x1_offen_merged_and: 435; PREGFX10: ; %bb.0: ; %main_body 436; PREGFX10-NEXT: buffer_load_dwordx4 v[1:4], v0, s[0:3], 0 offen offset:4 437; PREGFX10-NEXT: buffer_load_dwordx2 v[5:6], v0, s[0:3], 0 offen offset:28 438; PREGFX10-NEXT: s_waitcnt vmcnt(1) 439; PREGFX10-NEXT: exp mrt0 v1, v2, v3, v4 done vm 440; PREGFX10-NEXT: s_waitcnt vmcnt(0) 441; PREGFX10-NEXT: exp mrt0 v5, v6, v0, v0 done vm 442; PREGFX10-NEXT: s_endpgm 443; 444; GFX10-LABEL: buffer_load_x1_offen_merged_and: 445; GFX10: ; %bb.0: ; %main_body 446; GFX10-NEXT: s_clause 0x1 447; GFX10-NEXT: buffer_load_dwordx4 v[1:4], v0, s[0:3], 0 offen offset:4 448; GFX10-NEXT: buffer_load_dwordx2 v[5:6], v0, s[0:3], 0 offen offset:28 449; GFX10-NEXT: s_waitcnt vmcnt(1) 450; GFX10-NEXT: exp mrt0 v1, v2, v3, v4 done vm 451; GFX10-NEXT: s_waitcnt vmcnt(0) 452; GFX10-NEXT: exp mrt0 v5, v6, v0, v0 done vm 453; GFX10-NEXT: s_endpgm 454; 455; GFX11-LABEL: buffer_load_x1_offen_merged_and: 456; GFX11: ; %bb.0: ; %main_body 457; GFX11-NEXT: s_clause 0x1 458; GFX11-NEXT: buffer_load_b128 v[1:4], v0, s[0:3], 0 offen offset:4 459; GFX11-NEXT: buffer_load_b64 v[5:6], v0, s[0:3], 0 offen offset:28 460; GFX11-NEXT: s_waitcnt vmcnt(1) 461; GFX11-NEXT: exp mrt0 v1, v2, v3, v4 done 462; GFX11-NEXT: s_waitcnt vmcnt(0) 463; GFX11-NEXT: exp mrt0 v5, v6, v0, v0 done 464; GFX11-NEXT: s_endpgm 465main_body: 466 %a1 = add i32 %a, 4 467 %a2 = add i32 %a, 8 468 %a3 = add i32 %a, 12 469 %a4 = add i32 %a, 16 470 %a5 = add i32 %a, 28 471 %a6 = add i32 %a, 32 472 %r1 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a1, i32 0, i32 0) 473 %r2 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a2, i32 0, i32 0) 474 %r3 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a3, i32 0, i32 0) 475 %r4 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a4, i32 0, i32 0) 476 %r5 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a5, i32 0, i32 0) 477 %r6 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a6, i32 0, i32 0) 478 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true) 479 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true) 480 ret void 481} 482 483define amdgpu_ps void @buffer_load_x1_offen_merged_or(ptr addrspace(8) inreg %rsrc, i32 %inp) { 484; PREGFX10-LABEL: buffer_load_x1_offen_merged_or: 485; PREGFX10: ; %bb.0: ; %main_body 486; PREGFX10-NEXT: v_lshlrev_b32_e32 v4, 6, v0 487; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v4, s[0:3], 0 offen offset:4 488; PREGFX10-NEXT: buffer_load_dwordx2 v[4:5], v4, s[0:3], 0 offen offset:28 489; PREGFX10-NEXT: s_waitcnt vmcnt(1) 490; PREGFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm 491; PREGFX10-NEXT: s_waitcnt vmcnt(0) 492; PREGFX10-NEXT: exp mrt0 v4, v5, v0, v0 done vm 493; PREGFX10-NEXT: s_endpgm 494; 495; GFX10-LABEL: buffer_load_x1_offen_merged_or: 496; GFX10: ; %bb.0: ; %main_body 497; GFX10-NEXT: v_lshlrev_b32_e32 v6, 6, v0 498; GFX10-NEXT: s_clause 0x1 499; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v6, s[0:3], 0 offen offset:4 500; GFX10-NEXT: buffer_load_dwordx2 v[4:5], v6, s[0:3], 0 offen offset:28 501; GFX10-NEXT: s_waitcnt vmcnt(1) 502; GFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm 503; GFX10-NEXT: s_waitcnt vmcnt(0) 504; GFX10-NEXT: exp mrt0 v4, v5, v0, v0 done vm 505; GFX10-NEXT: s_endpgm 506; 507; GFX11-LABEL: buffer_load_x1_offen_merged_or: 508; GFX11: ; %bb.0: ; %main_body 509; GFX11-NEXT: v_lshlrev_b32_e32 v4, 6, v0 510; GFX11-NEXT: s_clause 0x1 511; GFX11-NEXT: buffer_load_b128 v[0:3], v4, s[0:3], 0 offen offset:4 512; GFX11-NEXT: buffer_load_b64 v[4:5], v4, s[0:3], 0 offen offset:28 513; GFX11-NEXT: s_waitcnt vmcnt(1) 514; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done 515; GFX11-NEXT: s_waitcnt vmcnt(0) 516; GFX11-NEXT: exp mrt0 v4, v5, v0, v0 done 517; GFX11-NEXT: s_endpgm 518main_body: 519 %a = shl i32 %inp, 6 520 %a1 = or i32 %a, 4 521 %a2 = or i32 %a, 8 522 %a3 = or i32 %a, 12 523 %a4 = or i32 %a, 16 524 %a5 = or i32 %a, 28 525 %a6 = or i32 %a, 32 526 %r1 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a1, i32 0, i32 0) 527 %r2 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a2, i32 0, i32 0) 528 %r3 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a3, i32 0, i32 0) 529 %r4 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a4, i32 0, i32 0) 530 %r5 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a5, i32 0, i32 0) 531 %r6 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a6, i32 0, i32 0) 532 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true) 533 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true) 534 ret void 535} 536 537define amdgpu_ps void @buffer_load_x1_offen_merged_glc_slc(ptr addrspace(8) inreg %rsrc, i32 %a) { 538; PREGFX10-LABEL: buffer_load_x1_offen_merged_glc_slc: 539; PREGFX10: ; %bb.0: ; %main_body 540; PREGFX10-NEXT: buffer_load_dwordx2 v[1:2], v0, s[0:3], 0 offen offset:4 541; PREGFX10-NEXT: buffer_load_dwordx2 v[3:4], v0, s[0:3], 0 offen offset:12 glc 542; PREGFX10-NEXT: buffer_load_dwordx2 v[5:6], v0, s[0:3], 0 offen offset:28 glc slc 543; PREGFX10-NEXT: s_waitcnt vmcnt(1) 544; PREGFX10-NEXT: exp mrt0 v1, v2, v3, v4 done vm 545; PREGFX10-NEXT: s_waitcnt vmcnt(0) 546; PREGFX10-NEXT: exp mrt0 v5, v6, v0, v0 done vm 547; PREGFX10-NEXT: s_endpgm 548; 549; GFX10-LABEL: buffer_load_x1_offen_merged_glc_slc: 550; GFX10: ; %bb.0: ; %main_body 551; GFX10-NEXT: s_clause 0x2 552; GFX10-NEXT: buffer_load_dwordx2 v[1:2], v0, s[0:3], 0 offen offset:4 553; GFX10-NEXT: buffer_load_dwordx2 v[3:4], v0, s[0:3], 0 offen offset:12 glc 554; GFX10-NEXT: buffer_load_dwordx2 v[5:6], v0, s[0:3], 0 offen offset:28 glc slc 555; GFX10-NEXT: s_waitcnt vmcnt(1) 556; GFX10-NEXT: exp mrt0 v1, v2, v3, v4 done vm 557; GFX10-NEXT: s_waitcnt vmcnt(0) 558; GFX10-NEXT: exp mrt0 v5, v6, v0, v0 done vm 559; GFX10-NEXT: s_endpgm 560; 561; GFX11-LABEL: buffer_load_x1_offen_merged_glc_slc: 562; GFX11: ; %bb.0: ; %main_body 563; GFX11-NEXT: s_clause 0x2 564; GFX11-NEXT: buffer_load_b64 v[1:2], v0, s[0:3], 0 offen offset:4 565; GFX11-NEXT: buffer_load_b64 v[3:4], v0, s[0:3], 0 offen offset:12 glc 566; GFX11-NEXT: buffer_load_b64 v[5:6], v0, s[0:3], 0 offen offset:28 glc slc 567; GFX11-NEXT: s_waitcnt vmcnt(1) 568; GFX11-NEXT: exp mrt0 v1, v2, v3, v4 done 569; GFX11-NEXT: s_waitcnt vmcnt(0) 570; GFX11-NEXT: exp mrt0 v5, v6, v0, v0 done 571; GFX11-NEXT: s_endpgm 572main_body: 573 %a1 = add i32 %a, 4 574 %a2 = add i32 %a, 8 575 %a3 = add i32 %a, 12 576 %a4 = add i32 %a, 16 577 %a5 = add i32 %a, 28 578 %a6 = add i32 %a, 32 579 %r1 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a1, i32 0, i32 0) 580 %r2 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a2, i32 0, i32 0) 581 %r3 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a3, i32 0, i32 1) 582 %r4 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a4, i32 0, i32 1) 583 %r5 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a5, i32 0, i32 3) 584 %r6 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 %a6, i32 0, i32 3) 585 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true) 586 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true) 587 ret void 588} 589 590define amdgpu_ps void @buffer_load_x2_offen_merged_and(ptr addrspace(8) inreg %rsrc, i32 %a) { 591; PREGFX10-LABEL: buffer_load_x2_offen_merged_and: 592; PREGFX10: ; %bb.0: ; %main_body 593; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4 594; PREGFX10-NEXT: s_waitcnt vmcnt(0) 595; PREGFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm 596; PREGFX10-NEXT: s_endpgm 597; 598; GFX10-LABEL: buffer_load_x2_offen_merged_and: 599; GFX10: ; %bb.0: ; %main_body 600; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4 601; GFX10-NEXT: s_waitcnt vmcnt(0) 602; GFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm 603; GFX10-NEXT: s_endpgm 604; 605; GFX11-LABEL: buffer_load_x2_offen_merged_and: 606; GFX11: ; %bb.0: ; %main_body 607; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4 608; GFX11-NEXT: s_waitcnt vmcnt(0) 609; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done 610; GFX11-NEXT: s_endpgm 611main_body: 612 %a1 = add i32 %a, 4 613 %a2 = add i32 %a, 12 614 %vr1 = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %a1, i32 0, i32 0) 615 %vr2 = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %a2, i32 0, i32 0) 616 %r1 = extractelement <2 x float> %vr1, i32 0 617 %r2 = extractelement <2 x float> %vr1, i32 1 618 %r3 = extractelement <2 x float> %vr2, i32 0 619 %r4 = extractelement <2 x float> %vr2, i32 1 620 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true) 621 ret void 622} 623 624define amdgpu_ps void @buffer_load_x2_offen_merged_or(ptr addrspace(8) inreg %rsrc, i32 %inp) { 625; PREGFX10-LABEL: buffer_load_x2_offen_merged_or: 626; PREGFX10: ; %bb.0: ; %main_body 627; PREGFX10-NEXT: v_lshlrev_b32_e32 v0, 4, v0 628; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4 629; PREGFX10-NEXT: s_waitcnt vmcnt(0) 630; PREGFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm 631; PREGFX10-NEXT: s_endpgm 632; 633; GFX10-LABEL: buffer_load_x2_offen_merged_or: 634; GFX10: ; %bb.0: ; %main_body 635; GFX10-NEXT: v_lshlrev_b32_e32 v0, 4, v0 636; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[0:3], 0 offen offset:4 637; GFX10-NEXT: s_waitcnt vmcnt(0) 638; GFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm 639; GFX10-NEXT: s_endpgm 640; 641; GFX11-LABEL: buffer_load_x2_offen_merged_or: 642; GFX11: ; %bb.0: ; %main_body 643; GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v0 644; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4 645; GFX11-NEXT: s_waitcnt vmcnt(0) 646; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done 647; GFX11-NEXT: s_endpgm 648main_body: 649 %a = shl i32 %inp, 4 650 %a1 = add i32 %a, 4 651 %a2 = add i32 %a, 12 652 %vr1 = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %a1, i32 0, i32 0) 653 %vr2 = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 %a2, i32 0, i32 0) 654 %r1 = extractelement <2 x float> %vr1, i32 0 655 %r2 = extractelement <2 x float> %vr1, i32 1 656 %r3 = extractelement <2 x float> %vr2, i32 0 657 %r4 = extractelement <2 x float> %vr2, i32 1 658 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true) 659 ret void 660} 661 662define amdgpu_ps void @buffer_load_x1_offset_merged(ptr addrspace(8) inreg %rsrc) { 663; PREGFX10-LABEL: buffer_load_x1_offset_merged: 664; PREGFX10: ; %bb.0: ; %main_body 665; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4 666; PREGFX10-NEXT: buffer_load_dwordx2 v[4:5], off, s[0:3], 0 offset:28 667; PREGFX10-NEXT: s_waitcnt vmcnt(1) 668; PREGFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm 669; PREGFX10-NEXT: s_waitcnt vmcnt(0) 670; PREGFX10-NEXT: exp mrt0 v4, v5, v0, v0 done vm 671; PREGFX10-NEXT: s_endpgm 672; 673; GFX10-LABEL: buffer_load_x1_offset_merged: 674; GFX10: ; %bb.0: ; %main_body 675; GFX10-NEXT: s_clause 0x1 676; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4 677; GFX10-NEXT: buffer_load_dwordx2 v[4:5], off, s[0:3], 0 offset:28 678; GFX10-NEXT: s_waitcnt vmcnt(1) 679; GFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm 680; GFX10-NEXT: s_waitcnt vmcnt(0) 681; GFX10-NEXT: exp mrt0 v4, v5, v0, v0 done vm 682; GFX10-NEXT: s_endpgm 683; 684; GFX11-LABEL: buffer_load_x1_offset_merged: 685; GFX11: ; %bb.0: ; %main_body 686; GFX11-NEXT: s_clause 0x1 687; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 offset:4 688; GFX11-NEXT: buffer_load_b64 v[4:5], off, s[0:3], 0 offset:28 689; GFX11-NEXT: s_waitcnt vmcnt(1) 690; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done 691; GFX11-NEXT: s_waitcnt vmcnt(0) 692; GFX11-NEXT: exp mrt0 v4, v5, v0, v0 done 693; GFX11-NEXT: s_endpgm 694main_body: 695 %r1 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 4, i32 0, i32 0) 696 %r2 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 8, i32 0, i32 0) 697 %r3 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 12, i32 0, i32 0) 698 %r4 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 16, i32 0, i32 0) 699 %r5 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 28, i32 0, i32 0) 700 %r6 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 32, i32 0, i32 0) 701 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true) 702 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true) 703 ret void 704} 705 706define amdgpu_ps void @buffer_load_x2_offset_merged(ptr addrspace(8) inreg %rsrc) { 707; PREGFX10-LABEL: buffer_load_x2_offset_merged: 708; PREGFX10: ; %bb.0: ; %main_body 709; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4 710; PREGFX10-NEXT: s_waitcnt vmcnt(0) 711; PREGFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm 712; PREGFX10-NEXT: s_endpgm 713; 714; GFX10-LABEL: buffer_load_x2_offset_merged: 715; GFX10: ; %bb.0: ; %main_body 716; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4 717; GFX10-NEXT: s_waitcnt vmcnt(0) 718; GFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm 719; GFX10-NEXT: s_endpgm 720; 721; GFX11-LABEL: buffer_load_x2_offset_merged: 722; GFX11: ; %bb.0: ; %main_body 723; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 offset:4 724; GFX11-NEXT: s_waitcnt vmcnt(0) 725; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done 726; GFX11-NEXT: s_endpgm 727main_body: 728 %vr1 = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 4, i32 0, i32 0) 729 %vr2 = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) %rsrc, i32 12, i32 0, i32 0) 730 %r1 = extractelement <2 x float> %vr1, i32 0 731 %r2 = extractelement <2 x float> %vr1, i32 1 732 %r3 = extractelement <2 x float> %vr2, i32 0 733 %r4 = extractelement <2 x float> %vr2, i32 1 734 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true) 735 ret void 736} 737 738define amdgpu_ps {<4 x float>, <2 x float>, float} @buffer_load_int(ptr addrspace(8) inreg) { 739; PREGFX10-LABEL: buffer_load_int: 740; PREGFX10: ; %bb.0: ; %main_body 741; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 742; PREGFX10-NEXT: buffer_load_dwordx2 v[4:5], off, s[0:3], 0 glc 743; PREGFX10-NEXT: buffer_load_dword v6, off, s[0:3], 0 slc 744; PREGFX10-NEXT: s_waitcnt vmcnt(0) 745; PREGFX10-NEXT: ; return to shader part epilog 746; 747; GFX10-LABEL: buffer_load_int: 748; GFX10: ; %bb.0: ; %main_body 749; GFX10-NEXT: s_clause 0x2 750; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 751; GFX10-NEXT: buffer_load_dwordx2 v[4:5], off, s[0:3], 0 glc 752; GFX10-NEXT: buffer_load_dword v6, off, s[0:3], 0 slc 753; GFX10-NEXT: s_waitcnt vmcnt(0) 754; GFX10-NEXT: ; return to shader part epilog 755; 756; GFX11-LABEL: buffer_load_int: 757; GFX11: ; %bb.0: ; %main_body 758; GFX11-NEXT: s_clause 0x2 759; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 760; GFX11-NEXT: buffer_load_b64 v[4:5], off, s[0:3], 0 glc 761; GFX11-NEXT: buffer_load_b32 v6, off, s[0:3], 0 slc 762; GFX11-NEXT: s_waitcnt vmcnt(0) 763; GFX11-NEXT: ; return to shader part epilog 764main_body: 765 %data = call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) %0, i32 0, i32 0, i32 0) 766 %data_glc = call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) %0, i32 0, i32 0, i32 1) 767 %data_slc = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) %0, i32 0, i32 0, i32 2) 768 %fdata = bitcast <4 x i32> %data to <4 x float> 769 %fdata_glc = bitcast <2 x i32> %data_glc to <2 x float> 770 %fdata_slc = bitcast i32 %data_slc to float 771 %r0 = insertvalue {<4 x float>, <2 x float>, float} undef, <4 x float> %fdata, 0 772 %r1 = insertvalue {<4 x float>, <2 x float>, float} %r0, <2 x float> %fdata_glc, 1 773 %r2 = insertvalue {<4 x float>, <2 x float>, float} %r1, float %fdata_slc, 2 774 ret {<4 x float>, <2 x float>, float} %r2 775} 776 777define amdgpu_ps float @raw_ptr_buffer_load_ubyte(ptr addrspace(8) inreg %rsrc) { 778; PREGFX10-LABEL: raw_ptr_buffer_load_ubyte: 779; PREGFX10: ; %bb.0: ; %main_body 780; PREGFX10-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 781; PREGFX10-NEXT: s_waitcnt vmcnt(0) 782; PREGFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 783; PREGFX10-NEXT: ; return to shader part epilog 784; 785; GFX10-LABEL: raw_ptr_buffer_load_ubyte: 786; GFX10: ; %bb.0: ; %main_body 787; GFX10-NEXT: buffer_load_ubyte v0, off, s[0:3], 0 788; GFX10-NEXT: s_waitcnt vmcnt(0) 789; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 790; GFX10-NEXT: ; return to shader part epilog 791; 792; GFX11-LABEL: raw_ptr_buffer_load_ubyte: 793; GFX11: ; %bb.0: ; %main_body 794; GFX11-NEXT: buffer_load_u8 v0, off, s[0:3], 0 795; GFX11-NEXT: s_waitcnt vmcnt(0) 796; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, v0 797; GFX11-NEXT: ; return to shader part epilog 798main_body: 799 %tmp = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 800 %tmp2 = zext i8 %tmp to i32 801 %val = uitofp i32 %tmp2 to float 802 ret float %val 803} 804 805define amdgpu_ps float @raw_ptr_buffer_load_i16(ptr addrspace(8) inreg %rsrc) { 806; PREGFX10-LABEL: raw_ptr_buffer_load_i16: 807; PREGFX10: ; %bb.0: ; %main_body 808; PREGFX10-NEXT: buffer_load_ushort v0, off, s[0:3], 0 809; PREGFX10-NEXT: s_waitcnt vmcnt(0) 810; PREGFX10-NEXT: v_cvt_f32_u32_e32 v0, v0 811; PREGFX10-NEXT: ; return to shader part epilog 812; 813; GFX10-LABEL: raw_ptr_buffer_load_i16: 814; GFX10: ; %bb.0: ; %main_body 815; GFX10-NEXT: buffer_load_ushort v0, off, s[0:3], 0 816; GFX10-NEXT: s_waitcnt vmcnt(0) 817; GFX10-NEXT: v_cvt_f32_u32_e32 v0, v0 818; GFX10-NEXT: ; return to shader part epilog 819; 820; GFX11-LABEL: raw_ptr_buffer_load_i16: 821; GFX11: ; %bb.0: ; %main_body 822; GFX11-NEXT: buffer_load_u16 v0, off, s[0:3], 0 823; GFX11-NEXT: s_waitcnt vmcnt(0) 824; GFX11-NEXT: v_cvt_f32_u32_e32 v0, v0 825; GFX11-NEXT: ; return to shader part epilog 826main_body: 827 %tmp = call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 828 %tmp2 = zext i16 %tmp to i32 829 %val = uitofp i32 %tmp2 to float 830 ret float %val 831} 832 833define amdgpu_ps float @raw_ptr_buffer_load_sbyte(ptr addrspace(8) inreg %rsrc) { 834; PREGFX10-LABEL: raw_ptr_buffer_load_sbyte: 835; PREGFX10: ; %bb.0: ; %main_body 836; PREGFX10-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 837; PREGFX10-NEXT: s_waitcnt vmcnt(0) 838; PREGFX10-NEXT: v_cvt_f32_i32_e32 v0, v0 839; PREGFX10-NEXT: ; return to shader part epilog 840; 841; GFX10-LABEL: raw_ptr_buffer_load_sbyte: 842; GFX10: ; %bb.0: ; %main_body 843; GFX10-NEXT: buffer_load_sbyte v0, off, s[0:3], 0 844; GFX10-NEXT: s_waitcnt vmcnt(0) 845; GFX10-NEXT: v_cvt_f32_i32_e32 v0, v0 846; GFX10-NEXT: ; return to shader part epilog 847; 848; GFX11-LABEL: raw_ptr_buffer_load_sbyte: 849; GFX11: ; %bb.0: ; %main_body 850; GFX11-NEXT: buffer_load_i8 v0, off, s[0:3], 0 851; GFX11-NEXT: s_waitcnt vmcnt(0) 852; GFX11-NEXT: v_cvt_f32_i32_e32 v0, v0 853; GFX11-NEXT: ; return to shader part epilog 854main_body: 855 %tmp = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 856 %tmp2 = sext i8 %tmp to i32 857 %val = sitofp i32 %tmp2 to float 858 ret float %val 859} 860 861define amdgpu_ps float @raw_ptr_buffer_load_sshort(ptr addrspace(8) inreg %rsrc) { 862; PREGFX10-LABEL: raw_ptr_buffer_load_sshort: 863; PREGFX10: ; %bb.0: ; %main_body 864; PREGFX10-NEXT: buffer_load_sshort v0, off, s[0:3], 0 865; PREGFX10-NEXT: s_waitcnt vmcnt(0) 866; PREGFX10-NEXT: v_cvt_f32_i32_e32 v0, v0 867; PREGFX10-NEXT: ; return to shader part epilog 868; 869; GFX10-LABEL: raw_ptr_buffer_load_sshort: 870; GFX10: ; %bb.0: ; %main_body 871; GFX10-NEXT: buffer_load_sshort v0, off, s[0:3], 0 872; GFX10-NEXT: s_waitcnt vmcnt(0) 873; GFX10-NEXT: v_cvt_f32_i32_e32 v0, v0 874; GFX10-NEXT: ; return to shader part epilog 875; 876; GFX11-LABEL: raw_ptr_buffer_load_sshort: 877; GFX11: ; %bb.0: ; %main_body 878; GFX11-NEXT: buffer_load_i16 v0, off, s[0:3], 0 879; GFX11-NEXT: s_waitcnt vmcnt(0) 880; GFX11-NEXT: v_cvt_f32_i32_e32 v0, v0 881; GFX11-NEXT: ; return to shader part epilog 882main_body: 883 %tmp = call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 884 %tmp2 = sext i16 %tmp to i32 885 %val = sitofp i32 %tmp2 to float 886 ret float %val 887} 888 889define amdgpu_ps void @raw_ptr_buffer_load_f16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) { 890; PREGFX10-LABEL: raw_ptr_buffer_load_f16: 891; PREGFX10: ; %bb.0: ; %main_body 892; PREGFX10-NEXT: buffer_load_ushort v1, off, s[0:3], 0 893; PREGFX10-NEXT: s_mov_b32 m0, -1 894; PREGFX10-NEXT: s_waitcnt vmcnt(0) 895; PREGFX10-NEXT: ds_write_b16 v0, v1 896; PREGFX10-NEXT: s_endpgm 897; 898; GFX10-LABEL: raw_ptr_buffer_load_f16: 899; GFX10: ; %bb.0: ; %main_body 900; GFX10-NEXT: buffer_load_ushort v1, off, s[0:3], 0 901; GFX10-NEXT: s_waitcnt vmcnt(0) 902; GFX10-NEXT: ds_write_b16 v0, v1 903; GFX10-NEXT: s_endpgm 904; 905; GFX11-LABEL: raw_ptr_buffer_load_f16: 906; GFX11: ; %bb.0: ; %main_body 907; GFX11-NEXT: buffer_load_u16 v1, off, s[0:3], 0 908; GFX11-NEXT: s_waitcnt vmcnt(0) 909; GFX11-NEXT: ds_store_b16 v0, v1 910; GFX11-NEXT: s_endpgm 911main_body: 912 %val = call half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 913 store half %val, ptr addrspace(3) %ptr 914 ret void 915} 916 917define amdgpu_ps void @raw_ptr_buffer_load_v2f16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) { 918; PREGFX10-LABEL: raw_ptr_buffer_load_v2f16: 919; PREGFX10: ; %bb.0: ; %main_body 920; PREGFX10-NEXT: buffer_load_dword v1, off, s[0:3], 0 921; PREGFX10-NEXT: s_mov_b32 m0, -1 922; PREGFX10-NEXT: s_waitcnt vmcnt(0) 923; PREGFX10-NEXT: ds_write_b32 v0, v1 924; PREGFX10-NEXT: s_endpgm 925; 926; GFX10-LABEL: raw_ptr_buffer_load_v2f16: 927; GFX10: ; %bb.0: ; %main_body 928; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], 0 929; GFX10-NEXT: s_waitcnt vmcnt(0) 930; GFX10-NEXT: ds_write_b32 v0, v1 931; GFX10-NEXT: s_endpgm 932; 933; GFX11-LABEL: raw_ptr_buffer_load_v2f16: 934; GFX11: ; %bb.0: ; %main_body 935; GFX11-NEXT: buffer_load_b32 v1, off, s[0:3], 0 936; GFX11-NEXT: s_waitcnt vmcnt(0) 937; GFX11-NEXT: ds_store_b32 v0, v1 938; GFX11-NEXT: s_endpgm 939main_body: 940 %val = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 941 store <2 x half> %val, ptr addrspace(3) %ptr 942 ret void 943} 944 945define amdgpu_ps void @raw_ptr_buffer_load_v4f16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) { 946; PREGFX10-LABEL: raw_ptr_buffer_load_v4f16: 947; PREGFX10: ; %bb.0: 948; PREGFX10-NEXT: buffer_load_dwordx2 v[1:2], off, s[0:3], 0 949; PREGFX10-NEXT: s_mov_b32 m0, -1 950; PREGFX10-NEXT: s_waitcnt vmcnt(0) 951; PREGFX10-NEXT: ds_write_b64 v0, v[1:2] 952; PREGFX10-NEXT: s_endpgm 953; 954; GFX10-LABEL: raw_ptr_buffer_load_v4f16: 955; GFX10: ; %bb.0: 956; GFX10-NEXT: buffer_load_dwordx2 v[1:2], off, s[0:3], 0 957; GFX10-NEXT: s_waitcnt vmcnt(0) 958; GFX10-NEXT: ds_write_b64 v0, v[1:2] 959; GFX10-NEXT: s_endpgm 960; 961; GFX11-LABEL: raw_ptr_buffer_load_v4f16: 962; GFX11: ; %bb.0: 963; GFX11-NEXT: buffer_load_b64 v[1:2], off, s[0:3], 0 964; GFX11-NEXT: s_waitcnt vmcnt(0) 965; GFX11-NEXT: ds_store_b64 v0, v[1:2] 966; GFX11-NEXT: s_endpgm 967 %val = call <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.v4f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 968 store <4 x half> %val, ptr addrspace(3) %ptr 969 ret void 970} 971 972; FIXME 973; define amdgpu_ps void @raw_ptr_buffer_load_v6f16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) { 974; %val = call <6 x half> @llvm.amdgcn.raw.ptr.buffer.load.v6f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 975; store <6 x half> %val, ptr addrspace(3) %ptr 976; ret void 977; } 978 979define amdgpu_ps void @raw_ptr_buffer_load_v8f16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) { 980; GFX10-LABEL: raw_ptr_buffer_load_v8f16: 981; GFX10: ; %bb.0: 982; GFX10-NEXT: buffer_load_dwordx4 v[1:4], off, s[0:3], 0 983; GFX10-NEXT: s_waitcnt vmcnt(0) 984; GFX10-NEXT: ds_write_b128 v0, v[1:4] 985; GFX10-NEXT: s_endpgm 986; 987; GFX11-LABEL: raw_ptr_buffer_load_v8f16: 988; GFX11: ; %bb.0: 989; GFX11-NEXT: buffer_load_b128 v[1:4], off, s[0:3], 0 990; GFX11-NEXT: s_waitcnt vmcnt(0) 991; GFX11-NEXT: ds_store_b128 v0, v[1:4] 992; GFX11-NEXT: s_endpgm 993 %val = call <8 x half> @llvm.amdgcn.raw.ptr.buffer.load.v8f16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 994 store <8 x half> %val, ptr addrspace(3) %ptr 995 ret void 996} 997 998define amdgpu_ps void @raw_ptr_buffer_load_v2i16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) { 999; PREGFX10-LABEL: raw_ptr_buffer_load_v2i16: 1000; PREGFX10: ; %bb.0: ; %main_body 1001; PREGFX10-NEXT: buffer_load_dword v1, off, s[0:3], 0 1002; PREGFX10-NEXT: s_mov_b32 m0, -1 1003; PREGFX10-NEXT: s_waitcnt vmcnt(0) 1004; PREGFX10-NEXT: ds_write_b32 v0, v1 1005; PREGFX10-NEXT: s_endpgm 1006; 1007; GFX10-LABEL: raw_ptr_buffer_load_v2i16: 1008; GFX10: ; %bb.0: ; %main_body 1009; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], 0 1010; GFX10-NEXT: s_waitcnt vmcnt(0) 1011; GFX10-NEXT: ds_write_b32 v0, v1 1012; GFX10-NEXT: s_endpgm 1013; 1014; GFX11-LABEL: raw_ptr_buffer_load_v2i16: 1015; GFX11: ; %bb.0: ; %main_body 1016; GFX11-NEXT: buffer_load_b32 v1, off, s[0:3], 0 1017; GFX11-NEXT: s_waitcnt vmcnt(0) 1018; GFX11-NEXT: ds_store_b32 v0, v1 1019; GFX11-NEXT: s_endpgm 1020main_body: 1021 %val = call <2 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v2i16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 1022 store <2 x i16> %val, ptr addrspace(3) %ptr 1023 ret void 1024} 1025 1026define amdgpu_ps void @raw_ptr_buffer_load_v4i16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) { 1027; PREGFX10-LABEL: raw_ptr_buffer_load_v4i16: 1028; PREGFX10: ; %bb.0: 1029; PREGFX10-NEXT: buffer_load_dwordx2 v[1:2], off, s[0:3], 0 1030; PREGFX10-NEXT: s_mov_b32 m0, -1 1031; PREGFX10-NEXT: s_waitcnt vmcnt(0) 1032; PREGFX10-NEXT: ds_write_b64 v0, v[1:2] 1033; PREGFX10-NEXT: s_endpgm 1034; 1035; GFX10-LABEL: raw_ptr_buffer_load_v4i16: 1036; GFX10: ; %bb.0: 1037; GFX10-NEXT: buffer_load_dwordx2 v[1:2], off, s[0:3], 0 1038; GFX10-NEXT: s_waitcnt vmcnt(0) 1039; GFX10-NEXT: ds_write_b64 v0, v[1:2] 1040; GFX10-NEXT: s_endpgm 1041; 1042; GFX11-LABEL: raw_ptr_buffer_load_v4i16: 1043; GFX11: ; %bb.0: 1044; GFX11-NEXT: buffer_load_b64 v[1:2], off, s[0:3], 0 1045; GFX11-NEXT: s_waitcnt vmcnt(0) 1046; GFX11-NEXT: ds_store_b64 v0, v[1:2] 1047; GFX11-NEXT: s_endpgm 1048 %val = call <4 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v4i16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 1049 store <4 x i16> %val, ptr addrspace(3) %ptr 1050 ret void 1051} 1052 1053; FIXME 1054; define amdgpu_ps void @raw_ptr_buffer_load_v6i16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) { 1055; %val = call <6 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v6i16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 1056; store <6 x i16> %val, ptr addrspace(3) %ptr 1057; ret void 1058; } 1059 1060define amdgpu_ps void @raw_ptr_buffer_load_v8i16(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %ptr) { 1061; GFX10-LABEL: raw_ptr_buffer_load_v8i16: 1062; GFX10: ; %bb.0: 1063; GFX10-NEXT: buffer_load_dwordx4 v[1:4], off, s[0:3], 0 1064; GFX10-NEXT: s_waitcnt vmcnt(0) 1065; GFX10-NEXT: ds_write_b128 v0, v[1:4] 1066; GFX10-NEXT: s_endpgm 1067; 1068; GFX11-LABEL: raw_ptr_buffer_load_v8i16: 1069; GFX11: ; %bb.0: 1070; GFX11-NEXT: buffer_load_b128 v[1:4], off, s[0:3], 0 1071; GFX11-NEXT: s_waitcnt vmcnt(0) 1072; GFX11-NEXT: ds_store_b128 v0, v[1:4] 1073; GFX11-NEXT: s_endpgm 1074 %val = call <8 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v8i16(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 1075 store <8 x i16> %val, ptr addrspace(3) %ptr 1076 ret void 1077} 1078 1079define amdgpu_ps void @raw_ptr_buffer_load_x1_offset_merged(ptr addrspace(8) inreg %rsrc) { 1080; PREGFX10-LABEL: raw_ptr_buffer_load_x1_offset_merged: 1081; PREGFX10: ; %bb.0: ; %main_body 1082; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4 1083; PREGFX10-NEXT: buffer_load_dwordx2 v[4:5], off, s[0:3], 0 offset:28 1084; PREGFX10-NEXT: s_waitcnt vmcnt(1) 1085; PREGFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm 1086; PREGFX10-NEXT: s_waitcnt vmcnt(0) 1087; PREGFX10-NEXT: exp mrt0 v4, v5, v0, v0 done vm 1088; PREGFX10-NEXT: s_endpgm 1089; 1090; GFX10-LABEL: raw_ptr_buffer_load_x1_offset_merged: 1091; GFX10: ; %bb.0: ; %main_body 1092; GFX10-NEXT: s_clause 0x1 1093; GFX10-NEXT: buffer_load_dwordx4 v[0:3], off, s[0:3], 0 offset:4 1094; GFX10-NEXT: buffer_load_dwordx2 v[4:5], off, s[0:3], 0 offset:28 1095; GFX10-NEXT: s_waitcnt vmcnt(1) 1096; GFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm 1097; GFX10-NEXT: s_waitcnt vmcnt(0) 1098; GFX10-NEXT: exp mrt0 v4, v5, v0, v0 done vm 1099; GFX10-NEXT: s_endpgm 1100; 1101; GFX11-LABEL: raw_ptr_buffer_load_x1_offset_merged: 1102; GFX11: ; %bb.0: ; %main_body 1103; GFX11-NEXT: s_clause 0x1 1104; GFX11-NEXT: buffer_load_b128 v[0:3], off, s[0:3], 0 offset:4 1105; GFX11-NEXT: buffer_load_b64 v[4:5], off, s[0:3], 0 offset:28 1106; GFX11-NEXT: s_waitcnt vmcnt(1) 1107; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done 1108; GFX11-NEXT: s_waitcnt vmcnt(0) 1109; GFX11-NEXT: exp mrt0 v4, v5, v0, v0 done 1110; GFX11-NEXT: s_endpgm 1111main_body: 1112 %r1 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 4, i32 0, i32 0) 1113 %r2 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 8, i32 0, i32 0) 1114 %r3 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 12, i32 0, i32 0) 1115 %r4 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 16, i32 0, i32 0) 1116 %r5 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 28, i32 0, i32 0) 1117 %r6 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 32, i32 0, i32 0) 1118 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true) 1119 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true) 1120 ret void 1121} 1122 1123define amdgpu_ps void @raw_ptr_buffer_load_x1_offset_swizzled_not_merged(ptr addrspace(8) inreg %rsrc) { 1124; PREGFX10-LABEL: raw_ptr_buffer_load_x1_offset_swizzled_not_merged: 1125; PREGFX10: ; %bb.0: ; %main_body 1126; PREGFX10-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4 1127; PREGFX10-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:8 1128; PREGFX10-NEXT: buffer_load_dword v2, off, s[0:3], 0 offset:12 1129; PREGFX10-NEXT: buffer_load_dword v3, off, s[0:3], 0 offset:16 1130; PREGFX10-NEXT: buffer_load_dword v4, off, s[0:3], 0 offset:28 1131; PREGFX10-NEXT: buffer_load_dword v5, off, s[0:3], 0 offset:32 1132; PREGFX10-NEXT: s_waitcnt vmcnt(2) 1133; PREGFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm 1134; PREGFX10-NEXT: s_waitcnt vmcnt(0) 1135; PREGFX10-NEXT: exp mrt0 v4, v5, v0, v0 done vm 1136; PREGFX10-NEXT: s_endpgm 1137; 1138; GFX10-LABEL: raw_ptr_buffer_load_x1_offset_swizzled_not_merged: 1139; GFX10: ; %bb.0: ; %main_body 1140; GFX10-NEXT: s_clause 0x5 1141; GFX10-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4 1142; GFX10-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:8 1143; GFX10-NEXT: buffer_load_dword v2, off, s[0:3], 0 offset:12 1144; GFX10-NEXT: buffer_load_dword v3, off, s[0:3], 0 offset:16 1145; GFX10-NEXT: buffer_load_dword v4, off, s[0:3], 0 offset:28 1146; GFX10-NEXT: buffer_load_dword v5, off, s[0:3], 0 offset:32 1147; GFX10-NEXT: s_waitcnt vmcnt(2) 1148; GFX10-NEXT: exp mrt0 v0, v1, v2, v3 done vm 1149; GFX10-NEXT: s_waitcnt vmcnt(0) 1150; GFX10-NEXT: exp mrt0 v4, v5, v0, v0 done vm 1151; GFX10-NEXT: s_endpgm 1152; 1153; GFX11-LABEL: raw_ptr_buffer_load_x1_offset_swizzled_not_merged: 1154; GFX11: ; %bb.0: ; %main_body 1155; GFX11-NEXT: s_clause 0x5 1156; GFX11-NEXT: buffer_load_b32 v0, off, s[0:3], 0 offset:4 1157; GFX11-NEXT: buffer_load_b32 v1, off, s[0:3], 0 offset:8 1158; GFX11-NEXT: buffer_load_b32 v2, off, s[0:3], 0 offset:12 1159; GFX11-NEXT: buffer_load_b32 v3, off, s[0:3], 0 offset:16 1160; GFX11-NEXT: buffer_load_b32 v4, off, s[0:3], 0 offset:28 1161; GFX11-NEXT: buffer_load_b32 v5, off, s[0:3], 0 offset:32 1162; GFX11-NEXT: s_waitcnt vmcnt(2) 1163; GFX11-NEXT: exp mrt0 v0, v1, v2, v3 done 1164; GFX11-NEXT: s_waitcnt vmcnt(0) 1165; GFX11-NEXT: exp mrt0 v4, v5, v0, v0 done 1166; GFX11-NEXT: s_endpgm 1167main_body: 1168 %r1 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 4, i32 0, i32 8) 1169 %r2 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 8, i32 0, i32 8) 1170 %r3 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 12, i32 0, i32 8) 1171 %r4 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 16, i32 0, i32 8) 1172 %r5 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 28, i32 0, i32 8) 1173 %r6 = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) %rsrc, i32 32, i32 0, i32 8) 1174 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true) 1175 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true) 1176 ret void 1177} 1178 1179define double @buffer_load_f64__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { 1180; PREGFX10-LABEL: buffer_load_f64__voffset_add: 1181; PREGFX10: ; %bb.0: 1182; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1183; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60 1184; PREGFX10-NEXT: s_waitcnt vmcnt(0) 1185; PREGFX10-NEXT: s_setpc_b64 s[30:31] 1186; 1187; GFX10-LABEL: buffer_load_f64__voffset_add: 1188; GFX10: ; %bb.0: 1189; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1190; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60 1191; GFX10-NEXT: s_waitcnt vmcnt(0) 1192; GFX10-NEXT: s_setpc_b64 s[30:31] 1193; 1194; GFX11-LABEL: buffer_load_f64__voffset_add: 1195; GFX11: ; %bb.0: 1196; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1197; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60 1198; GFX11-NEXT: s_waitcnt vmcnt(0) 1199; GFX11-NEXT: s_setpc_b64 s[30:31] 1200 %voffset.add = add i32 %voffset, 60 1201 %data = call double @llvm.amdgcn.raw.ptr.buffer.load.f64(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1202 ret double %data 1203} 1204 1205define <2 x double> @buffer_load_v2f64__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { 1206; PREGFX10-LABEL: buffer_load_v2f64__voffset_add: 1207; PREGFX10: ; %bb.0: 1208; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1209; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60 1210; PREGFX10-NEXT: s_waitcnt vmcnt(0) 1211; PREGFX10-NEXT: s_setpc_b64 s[30:31] 1212; 1213; GFX10-LABEL: buffer_load_v2f64__voffset_add: 1214; GFX10: ; %bb.0: 1215; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1216; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60 1217; GFX10-NEXT: s_waitcnt vmcnt(0) 1218; GFX10-NEXT: s_setpc_b64 s[30:31] 1219; 1220; GFX11-LABEL: buffer_load_v2f64__voffset_add: 1221; GFX11: ; %bb.0: 1222; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1223; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60 1224; GFX11-NEXT: s_waitcnt vmcnt(0) 1225; GFX11-NEXT: s_setpc_b64 s[30:31] 1226 %voffset.add = add i32 %voffset, 60 1227 %data = call <2 x double> @llvm.amdgcn.raw.ptr.buffer.load.v2f64(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1228 ret <2 x double> %data 1229} 1230 1231define i64 @buffer_load_i64__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { 1232; PREGFX10-LABEL: buffer_load_i64__voffset_add: 1233; PREGFX10: ; %bb.0: 1234; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1235; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60 1236; PREGFX10-NEXT: s_waitcnt vmcnt(0) 1237; PREGFX10-NEXT: s_setpc_b64 s[30:31] 1238; 1239; GFX10-LABEL: buffer_load_i64__voffset_add: 1240; GFX10: ; %bb.0: 1241; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1242; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60 1243; GFX10-NEXT: s_waitcnt vmcnt(0) 1244; GFX10-NEXT: s_setpc_b64 s[30:31] 1245; 1246; GFX11-LABEL: buffer_load_i64__voffset_add: 1247; GFX11: ; %bb.0: 1248; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1249; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60 1250; GFX11-NEXT: s_waitcnt vmcnt(0) 1251; GFX11-NEXT: s_setpc_b64 s[30:31] 1252 %voffset.add = add i32 %voffset, 60 1253 %data = call i64 @llvm.amdgcn.raw.ptr.buffer.load.i64(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1254 ret i64 %data 1255} 1256 1257define <2 x i64> @buffer_load_v2i64__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { 1258; PREGFX10-LABEL: buffer_load_v2i64__voffset_add: 1259; PREGFX10: ; %bb.0: 1260; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1261; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60 1262; PREGFX10-NEXT: s_waitcnt vmcnt(0) 1263; PREGFX10-NEXT: s_setpc_b64 s[30:31] 1264; 1265; GFX10-LABEL: buffer_load_v2i64__voffset_add: 1266; GFX10: ; %bb.0: 1267; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1268; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60 1269; GFX10-NEXT: s_waitcnt vmcnt(0) 1270; GFX10-NEXT: s_setpc_b64 s[30:31] 1271; 1272; GFX11-LABEL: buffer_load_v2i64__voffset_add: 1273; GFX11: ; %bb.0: 1274; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1275; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60 1276; GFX11-NEXT: s_waitcnt vmcnt(0) 1277; GFX11-NEXT: s_setpc_b64 s[30:31] 1278 %voffset.add = add i32 %voffset, 60 1279 %data = call <2 x i64> @llvm.amdgcn.raw.ptr.buffer.load.v2i64(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1280 ret <2 x i64> %data 1281} 1282 1283define ptr @buffer_load_p0__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { 1284; PREGFX10-LABEL: buffer_load_p0__voffset_add: 1285; PREGFX10: ; %bb.0: 1286; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1287; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60 1288; PREGFX10-NEXT: s_waitcnt vmcnt(0) 1289; PREGFX10-NEXT: s_setpc_b64 s[30:31] 1290; 1291; GFX10-LABEL: buffer_load_p0__voffset_add: 1292; GFX10: ; %bb.0: 1293; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1294; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60 1295; GFX10-NEXT: s_waitcnt vmcnt(0) 1296; GFX10-NEXT: s_setpc_b64 s[30:31] 1297; 1298; GFX11-LABEL: buffer_load_p0__voffset_add: 1299; GFX11: ; %bb.0: 1300; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1301; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60 1302; GFX11-NEXT: s_waitcnt vmcnt(0) 1303; GFX11-NEXT: s_setpc_b64 s[30:31] 1304 %voffset.add = add i32 %voffset, 60 1305 %data = call ptr @llvm.amdgcn.raw.ptr.buffer.load.p0(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1306 ret ptr %data 1307} 1308 1309define <2 x ptr> @buffer_load_v2p0__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { 1310; PREGFX10-LABEL: buffer_load_v2p0__voffset_add: 1311; PREGFX10: ; %bb.0: 1312; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1313; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60 1314; PREGFX10-NEXT: s_waitcnt vmcnt(0) 1315; PREGFX10-NEXT: s_setpc_b64 s[30:31] 1316; 1317; GFX10-LABEL: buffer_load_v2p0__voffset_add: 1318; GFX10: ; %bb.0: 1319; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1320; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60 1321; GFX10-NEXT: s_waitcnt vmcnt(0) 1322; GFX10-NEXT: s_setpc_b64 s[30:31] 1323; 1324; GFX11-LABEL: buffer_load_v2p0__voffset_add: 1325; GFX11: ; %bb.0: 1326; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1327; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60 1328; GFX11-NEXT: s_waitcnt vmcnt(0) 1329; GFX11-NEXT: s_setpc_b64 s[30:31] 1330 %voffset.add = add i32 %voffset, 60 1331 %data = call <2 x ptr> @llvm.amdgcn.raw.ptr.buffer.load.p0(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1332 ret <2 x ptr> %data 1333} 1334 1335define ptr addrspace(1) @buffer_load_p1__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { 1336; PREGFX10-LABEL: buffer_load_p1__voffset_add: 1337; PREGFX10: ; %bb.0: 1338; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1339; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60 1340; PREGFX10-NEXT: s_waitcnt vmcnt(0) 1341; PREGFX10-NEXT: s_setpc_b64 s[30:31] 1342; 1343; GFX10-LABEL: buffer_load_p1__voffset_add: 1344; GFX10: ; %bb.0: 1345; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1346; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60 1347; GFX10-NEXT: s_waitcnt vmcnt(0) 1348; GFX10-NEXT: s_setpc_b64 s[30:31] 1349; 1350; GFX11-LABEL: buffer_load_p1__voffset_add: 1351; GFX11: ; %bb.0: 1352; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1353; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60 1354; GFX11-NEXT: s_waitcnt vmcnt(0) 1355; GFX11-NEXT: s_setpc_b64 s[30:31] 1356 %voffset.add = add i32 %voffset, 60 1357 %data = call ptr addrspace(1) @llvm.amdgcn.raw.ptr.buffer.load.p1(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1358 ret ptr addrspace(1) %data 1359} 1360 1361define <2 x ptr addrspace(1)> @buffer_load_v2p1__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { 1362; PREGFX10-LABEL: buffer_load_v2p1__voffset_add: 1363; PREGFX10: ; %bb.0: 1364; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1365; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60 1366; PREGFX10-NEXT: s_waitcnt vmcnt(0) 1367; PREGFX10-NEXT: s_setpc_b64 s[30:31] 1368; 1369; GFX10-LABEL: buffer_load_v2p1__voffset_add: 1370; GFX10: ; %bb.0: 1371; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1372; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60 1373; GFX10-NEXT: s_waitcnt vmcnt(0) 1374; GFX10-NEXT: s_setpc_b64 s[30:31] 1375; 1376; GFX11-LABEL: buffer_load_v2p1__voffset_add: 1377; GFX11: ; %bb.0: 1378; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1379; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60 1380; GFX11-NEXT: s_waitcnt vmcnt(0) 1381; GFX11-NEXT: s_setpc_b64 s[30:31] 1382 %voffset.add = add i32 %voffset, 60 1383 %data = call <2 x ptr addrspace(1)> @llvm.amdgcn.raw.ptr.buffer.load.p1(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1384 ret <2 x ptr addrspace(1)> %data 1385} 1386 1387define ptr addrspace(4) @buffer_load_p4__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { 1388; PREGFX10-LABEL: buffer_load_p4__voffset_add: 1389; PREGFX10: ; %bb.0: 1390; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1391; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60 1392; PREGFX10-NEXT: s_waitcnt vmcnt(0) 1393; PREGFX10-NEXT: s_setpc_b64 s[30:31] 1394; 1395; GFX10-LABEL: buffer_load_p4__voffset_add: 1396; GFX10: ; %bb.0: 1397; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1398; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60 1399; GFX10-NEXT: s_waitcnt vmcnt(0) 1400; GFX10-NEXT: s_setpc_b64 s[30:31] 1401; 1402; GFX11-LABEL: buffer_load_p4__voffset_add: 1403; GFX11: ; %bb.0: 1404; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1405; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60 1406; GFX11-NEXT: s_waitcnt vmcnt(0) 1407; GFX11-NEXT: s_setpc_b64 s[30:31] 1408 %voffset.add = add i32 %voffset, 60 1409 %data = call ptr addrspace(4) @llvm.amdgcn.raw.ptr.buffer.load.p4(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1410 ret ptr addrspace(4) %data 1411} 1412 1413define <2 x ptr addrspace(4)> @buffer_load_v2p4__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { 1414; PREGFX10-LABEL: buffer_load_v2p4__voffset_add: 1415; PREGFX10: ; %bb.0: 1416; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1417; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60 1418; PREGFX10-NEXT: s_waitcnt vmcnt(0) 1419; PREGFX10-NEXT: s_setpc_b64 s[30:31] 1420; 1421; GFX10-LABEL: buffer_load_v2p4__voffset_add: 1422; GFX10: ; %bb.0: 1423; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1424; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60 1425; GFX10-NEXT: s_waitcnt vmcnt(0) 1426; GFX10-NEXT: s_setpc_b64 s[30:31] 1427; 1428; GFX11-LABEL: buffer_load_v2p4__voffset_add: 1429; GFX11: ; %bb.0: 1430; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1431; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60 1432; GFX11-NEXT: s_waitcnt vmcnt(0) 1433; GFX11-NEXT: s_setpc_b64 s[30:31] 1434 %voffset.add = add i32 %voffset, 60 1435 %data = call <2 x ptr addrspace(4)> @llvm.amdgcn.raw.ptr.buffer.load.p4(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1436 ret <2 x ptr addrspace(4)> %data 1437} 1438 1439define ptr addrspace(999) @buffer_load_p999__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { 1440; PREGFX10-LABEL: buffer_load_p999__voffset_add: 1441; PREGFX10: ; %bb.0: 1442; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1443; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60 1444; PREGFX10-NEXT: s_waitcnt vmcnt(0) 1445; PREGFX10-NEXT: s_setpc_b64 s[30:31] 1446; 1447; GFX10-LABEL: buffer_load_p999__voffset_add: 1448; GFX10: ; %bb.0: 1449; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1450; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60 1451; GFX10-NEXT: s_waitcnt vmcnt(0) 1452; GFX10-NEXT: s_setpc_b64 s[30:31] 1453; 1454; GFX11-LABEL: buffer_load_p999__voffset_add: 1455; GFX11: ; %bb.0: 1456; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1457; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60 1458; GFX11-NEXT: s_waitcnt vmcnt(0) 1459; GFX11-NEXT: s_setpc_b64 s[30:31] 1460 %voffset.add = add i32 %voffset, 60 1461 %data = call ptr addrspace(999) @llvm.amdgcn.raw.ptr.buffer.load.p999(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1462 ret ptr addrspace(999) %data 1463} 1464 1465define <2 x ptr addrspace(999)> @buffer_load_v2p999__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { 1466; PREGFX10-LABEL: buffer_load_v2p999__voffset_add: 1467; PREGFX10: ; %bb.0: 1468; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1469; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60 1470; PREGFX10-NEXT: s_waitcnt vmcnt(0) 1471; PREGFX10-NEXT: s_setpc_b64 s[30:31] 1472; 1473; GFX10-LABEL: buffer_load_v2p999__voffset_add: 1474; GFX10: ; %bb.0: 1475; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1476; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60 1477; GFX10-NEXT: s_waitcnt vmcnt(0) 1478; GFX10-NEXT: s_setpc_b64 s[30:31] 1479; 1480; GFX11-LABEL: buffer_load_v2p999__voffset_add: 1481; GFX11: ; %bb.0: 1482; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1483; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60 1484; GFX11-NEXT: s_waitcnt vmcnt(0) 1485; GFX11-NEXT: s_setpc_b64 s[30:31] 1486 %voffset.add = add i32 %voffset, 60 1487 %data = call <2 x ptr addrspace(999)> @llvm.amdgcn.raw.ptr.buffer.load.p999(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1488 ret <2 x ptr addrspace(999)> %data 1489} 1490 1491define ptr addrspace(2) @buffer_load_p2__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { 1492; PREGFX10-LABEL: buffer_load_p2__voffset_add: 1493; PREGFX10: ; %bb.0: 1494; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1495; PREGFX10-NEXT: buffer_load_dword v0, v0, s[16:19], 0 offen offset:60 1496; PREGFX10-NEXT: s_waitcnt vmcnt(0) 1497; PREGFX10-NEXT: s_setpc_b64 s[30:31] 1498; 1499; GFX10-LABEL: buffer_load_p2__voffset_add: 1500; GFX10: ; %bb.0: 1501; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1502; GFX10-NEXT: buffer_load_dword v0, v0, s[16:19], 0 offen offset:60 1503; GFX10-NEXT: s_waitcnt vmcnt(0) 1504; GFX10-NEXT: s_setpc_b64 s[30:31] 1505; 1506; GFX11-LABEL: buffer_load_p2__voffset_add: 1507; GFX11: ; %bb.0: 1508; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1509; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen offset:60 1510; GFX11-NEXT: s_waitcnt vmcnt(0) 1511; GFX11-NEXT: s_setpc_b64 s[30:31] 1512 %voffset.add = add i32 %voffset, 60 1513 %data = call ptr addrspace(2) @llvm.amdgcn.raw.ptr.buffer.load.p2(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1514 ret ptr addrspace(2) %data 1515} 1516 1517define <2 x ptr addrspace(2)> @buffer_load_v2p2__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { 1518; PREGFX10-LABEL: buffer_load_v2p2__voffset_add: 1519; PREGFX10: ; %bb.0: 1520; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1521; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60 1522; PREGFX10-NEXT: s_waitcnt vmcnt(0) 1523; PREGFX10-NEXT: s_setpc_b64 s[30:31] 1524; 1525; GFX10-LABEL: buffer_load_v2p2__voffset_add: 1526; GFX10: ; %bb.0: 1527; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1528; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60 1529; GFX10-NEXT: s_waitcnt vmcnt(0) 1530; GFX10-NEXT: s_setpc_b64 s[30:31] 1531; 1532; GFX11-LABEL: buffer_load_v2p2__voffset_add: 1533; GFX11: ; %bb.0: 1534; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1535; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60 1536; GFX11-NEXT: s_waitcnt vmcnt(0) 1537; GFX11-NEXT: s_setpc_b64 s[30:31] 1538 %voffset.add = add i32 %voffset, 60 1539 %data = call <2 x ptr addrspace(2)> @llvm.amdgcn.raw.ptr.buffer.load.v2p2(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1540 ret <2 x ptr addrspace(2)> %data 1541} 1542 1543define <3 x ptr addrspace(2)> @buffer_load_v3p2__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { 1544; GFX10-LABEL: buffer_load_v3p2__voffset_add: 1545; GFX10: ; %bb.0: 1546; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1547; GFX10-NEXT: buffer_load_dwordx3 v[0:2], v0, s[16:19], 0 offen offset:60 1548; GFX10-NEXT: s_waitcnt vmcnt(0) 1549; GFX10-NEXT: s_setpc_b64 s[30:31] 1550; 1551; GFX11-LABEL: buffer_load_v3p2__voffset_add: 1552; GFX11: ; %bb.0: 1553; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1554; GFX11-NEXT: buffer_load_b96 v[0:2], v0, s[0:3], 0 offen offset:60 1555; GFX11-NEXT: s_waitcnt vmcnt(0) 1556; GFX11-NEXT: s_setpc_b64 s[30:31] 1557 %voffset.add = add i32 %voffset, 60 1558 %data = call <3 x ptr addrspace(2)> @llvm.amdgcn.raw.ptr.buffer.load.v3p2(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1559 ret <3 x ptr addrspace(2)> %data 1560} 1561 1562define <4 x ptr addrspace(2)> @buffer_load_v4p2__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { 1563; PREGFX10-LABEL: buffer_load_v4p2__voffset_add: 1564; PREGFX10: ; %bb.0: 1565; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1566; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60 1567; PREGFX10-NEXT: s_waitcnt vmcnt(0) 1568; PREGFX10-NEXT: s_setpc_b64 s[30:31] 1569; 1570; GFX10-LABEL: buffer_load_v4p2__voffset_add: 1571; GFX10: ; %bb.0: 1572; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1573; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60 1574; GFX10-NEXT: s_waitcnt vmcnt(0) 1575; GFX10-NEXT: s_setpc_b64 s[30:31] 1576; 1577; GFX11-LABEL: buffer_load_v4p2__voffset_add: 1578; GFX11: ; %bb.0: 1579; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1580; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60 1581; GFX11-NEXT: s_waitcnt vmcnt(0) 1582; GFX11-NEXT: s_setpc_b64 s[30:31] 1583 %voffset.add = add i32 %voffset, 60 1584 %data = call <4 x ptr addrspace(2)> @llvm.amdgcn.raw.ptr.buffer.load.v4p2(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1585 ret <4 x ptr addrspace(2)> %data 1586} 1587 1588define ptr addrspace(3) @buffer_load_p3__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { 1589; PREGFX10-LABEL: buffer_load_p3__voffset_add: 1590; PREGFX10: ; %bb.0: 1591; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1592; PREGFX10-NEXT: buffer_load_dword v0, v0, s[16:19], 0 offen offset:60 1593; PREGFX10-NEXT: s_waitcnt vmcnt(0) 1594; PREGFX10-NEXT: s_setpc_b64 s[30:31] 1595; 1596; GFX10-LABEL: buffer_load_p3__voffset_add: 1597; GFX10: ; %bb.0: 1598; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1599; GFX10-NEXT: buffer_load_dword v0, v0, s[16:19], 0 offen offset:60 1600; GFX10-NEXT: s_waitcnt vmcnt(0) 1601; GFX10-NEXT: s_setpc_b64 s[30:31] 1602; 1603; GFX11-LABEL: buffer_load_p3__voffset_add: 1604; GFX11: ; %bb.0: 1605; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1606; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen offset:60 1607; GFX11-NEXT: s_waitcnt vmcnt(0) 1608; GFX11-NEXT: s_setpc_b64 s[30:31] 1609 %voffset.add = add i32 %voffset, 60 1610 %data = call ptr addrspace(3) @llvm.amdgcn.raw.ptr.buffer.load.p3(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1611 ret ptr addrspace(3) %data 1612} 1613 1614define <2 x ptr addrspace(3)> @buffer_load_v2p3__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { 1615; PREGFX10-LABEL: buffer_load_v2p3__voffset_add: 1616; PREGFX10: ; %bb.0: 1617; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1618; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60 1619; PREGFX10-NEXT: s_waitcnt vmcnt(0) 1620; PREGFX10-NEXT: s_setpc_b64 s[30:31] 1621; 1622; GFX10-LABEL: buffer_load_v2p3__voffset_add: 1623; GFX10: ; %bb.0: 1624; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1625; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60 1626; GFX10-NEXT: s_waitcnt vmcnt(0) 1627; GFX10-NEXT: s_setpc_b64 s[30:31] 1628; 1629; GFX11-LABEL: buffer_load_v2p3__voffset_add: 1630; GFX11: ; %bb.0: 1631; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1632; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60 1633; GFX11-NEXT: s_waitcnt vmcnt(0) 1634; GFX11-NEXT: s_setpc_b64 s[30:31] 1635 %voffset.add = add i32 %voffset, 60 1636 %data = call <2 x ptr addrspace(3)> @llvm.amdgcn.raw.ptr.buffer.load.v2p3(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1637 ret <2 x ptr addrspace(3)> %data 1638} 1639 1640define <3 x ptr addrspace(3)> @buffer_load_v3p3__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { 1641; GFX10-LABEL: buffer_load_v3p3__voffset_add: 1642; GFX10: ; %bb.0: 1643; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1644; GFX10-NEXT: buffer_load_dwordx3 v[0:2], v0, s[16:19], 0 offen offset:60 1645; GFX10-NEXT: s_waitcnt vmcnt(0) 1646; GFX10-NEXT: s_setpc_b64 s[30:31] 1647; 1648; GFX11-LABEL: buffer_load_v3p3__voffset_add: 1649; GFX11: ; %bb.0: 1650; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1651; GFX11-NEXT: buffer_load_b96 v[0:2], v0, s[0:3], 0 offen offset:60 1652; GFX11-NEXT: s_waitcnt vmcnt(0) 1653; GFX11-NEXT: s_setpc_b64 s[30:31] 1654 %voffset.add = add i32 %voffset, 60 1655 %data = call <3 x ptr addrspace(3)> @llvm.amdgcn.raw.ptr.buffer.load.v3p3(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1656 ret <3 x ptr addrspace(3)> %data 1657} 1658 1659define <4 x ptr addrspace(3)> @buffer_load_v4p3__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { 1660; PREGFX10-LABEL: buffer_load_v4p3__voffset_add: 1661; PREGFX10: ; %bb.0: 1662; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1663; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60 1664; PREGFX10-NEXT: s_waitcnt vmcnt(0) 1665; PREGFX10-NEXT: s_setpc_b64 s[30:31] 1666; 1667; GFX10-LABEL: buffer_load_v4p3__voffset_add: 1668; GFX10: ; %bb.0: 1669; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1670; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60 1671; GFX10-NEXT: s_waitcnt vmcnt(0) 1672; GFX10-NEXT: s_setpc_b64 s[30:31] 1673; 1674; GFX11-LABEL: buffer_load_v4p3__voffset_add: 1675; GFX11: ; %bb.0: 1676; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1677; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60 1678; GFX11-NEXT: s_waitcnt vmcnt(0) 1679; GFX11-NEXT: s_setpc_b64 s[30:31] 1680 %voffset.add = add i32 %voffset, 60 1681 %data = call <4 x ptr addrspace(3)> @llvm.amdgcn.raw.ptr.buffer.load.v4p3(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1682 ret <4 x ptr addrspace(3)> %data 1683} 1684 1685define ptr addrspace(5) @buffer_load_p5__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { 1686; PREGFX10-LABEL: buffer_load_p5__voffset_add: 1687; PREGFX10: ; %bb.0: 1688; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1689; PREGFX10-NEXT: buffer_load_dword v0, v0, s[16:19], 0 offen offset:60 1690; PREGFX10-NEXT: s_waitcnt vmcnt(0) 1691; PREGFX10-NEXT: s_setpc_b64 s[30:31] 1692; 1693; GFX10-LABEL: buffer_load_p5__voffset_add: 1694; GFX10: ; %bb.0: 1695; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1696; GFX10-NEXT: buffer_load_dword v0, v0, s[16:19], 0 offen offset:60 1697; GFX10-NEXT: s_waitcnt vmcnt(0) 1698; GFX10-NEXT: s_setpc_b64 s[30:31] 1699; 1700; GFX11-LABEL: buffer_load_p5__voffset_add: 1701; GFX11: ; %bb.0: 1702; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1703; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen offset:60 1704; GFX11-NEXT: s_waitcnt vmcnt(0) 1705; GFX11-NEXT: s_setpc_b64 s[30:31] 1706 %voffset.add = add i32 %voffset, 60 1707 %data = call ptr addrspace(5) @llvm.amdgcn.raw.ptr.buffer.load.p5(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1708 ret ptr addrspace(5) %data 1709} 1710 1711define <2 x ptr addrspace(5)> @buffer_load_v2p5__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { 1712; PREGFX10-LABEL: buffer_load_v2p5__voffset_add: 1713; PREGFX10: ; %bb.0: 1714; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1715; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60 1716; PREGFX10-NEXT: s_waitcnt vmcnt(0) 1717; PREGFX10-NEXT: s_setpc_b64 s[30:31] 1718; 1719; GFX10-LABEL: buffer_load_v2p5__voffset_add: 1720; GFX10: ; %bb.0: 1721; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1722; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60 1723; GFX10-NEXT: s_waitcnt vmcnt(0) 1724; GFX10-NEXT: s_setpc_b64 s[30:31] 1725; 1726; GFX11-LABEL: buffer_load_v2p5__voffset_add: 1727; GFX11: ; %bb.0: 1728; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1729; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60 1730; GFX11-NEXT: s_waitcnt vmcnt(0) 1731; GFX11-NEXT: s_setpc_b64 s[30:31] 1732 %voffset.add = add i32 %voffset, 60 1733 %data = call <2 x ptr addrspace(5)> @llvm.amdgcn.raw.ptr.buffer.load.v2p5(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1734 ret <2 x ptr addrspace(5)> %data 1735} 1736 1737define <3 x ptr addrspace(5)> @buffer_load_v3p5__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { 1738; GFX10-LABEL: buffer_load_v3p5__voffset_add: 1739; GFX10: ; %bb.0: 1740; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1741; GFX10-NEXT: buffer_load_dwordx3 v[0:2], v0, s[16:19], 0 offen offset:60 1742; GFX10-NEXT: s_waitcnt vmcnt(0) 1743; GFX10-NEXT: s_setpc_b64 s[30:31] 1744; 1745; GFX11-LABEL: buffer_load_v3p5__voffset_add: 1746; GFX11: ; %bb.0: 1747; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1748; GFX11-NEXT: buffer_load_b96 v[0:2], v0, s[0:3], 0 offen offset:60 1749; GFX11-NEXT: s_waitcnt vmcnt(0) 1750; GFX11-NEXT: s_setpc_b64 s[30:31] 1751 %voffset.add = add i32 %voffset, 60 1752 %data = call <3 x ptr addrspace(5)> @llvm.amdgcn.raw.ptr.buffer.load.v3p5(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1753 ret <3 x ptr addrspace(5)> %data 1754} 1755 1756define <4 x ptr addrspace(5)> @buffer_load_v4p5__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { 1757; PREGFX10-LABEL: buffer_load_v4p5__voffset_add: 1758; PREGFX10: ; %bb.0: 1759; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1760; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60 1761; PREGFX10-NEXT: s_waitcnt vmcnt(0) 1762; PREGFX10-NEXT: s_setpc_b64 s[30:31] 1763; 1764; GFX10-LABEL: buffer_load_v4p5__voffset_add: 1765; GFX10: ; %bb.0: 1766; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1767; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60 1768; GFX10-NEXT: s_waitcnt vmcnt(0) 1769; GFX10-NEXT: s_setpc_b64 s[30:31] 1770; 1771; GFX11-LABEL: buffer_load_v4p5__voffset_add: 1772; GFX11: ; %bb.0: 1773; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1774; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60 1775; GFX11-NEXT: s_waitcnt vmcnt(0) 1776; GFX11-NEXT: s_setpc_b64 s[30:31] 1777 %voffset.add = add i32 %voffset, 60 1778 %data = call <4 x ptr addrspace(5)> @llvm.amdgcn.raw.ptr.buffer.load.v4p5(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1779 ret <4 x ptr addrspace(5)> %data 1780} 1781 1782define ptr addrspace(6) @buffer_load_p6__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { 1783; PREGFX10-LABEL: buffer_load_p6__voffset_add: 1784; PREGFX10: ; %bb.0: 1785; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1786; PREGFX10-NEXT: buffer_load_dword v0, v0, s[16:19], 0 offen offset:60 1787; PREGFX10-NEXT: s_waitcnt vmcnt(0) 1788; PREGFX10-NEXT: s_setpc_b64 s[30:31] 1789; 1790; GFX10-LABEL: buffer_load_p6__voffset_add: 1791; GFX10: ; %bb.0: 1792; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1793; GFX10-NEXT: buffer_load_dword v0, v0, s[16:19], 0 offen offset:60 1794; GFX10-NEXT: s_waitcnt vmcnt(0) 1795; GFX10-NEXT: s_setpc_b64 s[30:31] 1796; 1797; GFX11-LABEL: buffer_load_p6__voffset_add: 1798; GFX11: ; %bb.0: 1799; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1800; GFX11-NEXT: buffer_load_b32 v0, v0, s[0:3], 0 offen offset:60 1801; GFX11-NEXT: s_waitcnt vmcnt(0) 1802; GFX11-NEXT: s_setpc_b64 s[30:31] 1803 %voffset.add = add i32 %voffset, 60 1804 %data = call ptr addrspace(6) @llvm.amdgcn.raw.ptr.buffer.load.p6(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1805 ret ptr addrspace(6) %data 1806} 1807 1808define <2 x ptr addrspace(6)> @buffer_load_v2p6__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { 1809; PREGFX10-LABEL: buffer_load_v2p6__voffset_add: 1810; PREGFX10: ; %bb.0: 1811; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1812; PREGFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60 1813; PREGFX10-NEXT: s_waitcnt vmcnt(0) 1814; PREGFX10-NEXT: s_setpc_b64 s[30:31] 1815; 1816; GFX10-LABEL: buffer_load_v2p6__voffset_add: 1817; GFX10: ; %bb.0: 1818; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1819; GFX10-NEXT: buffer_load_dwordx2 v[0:1], v0, s[16:19], 0 offen offset:60 1820; GFX10-NEXT: s_waitcnt vmcnt(0) 1821; GFX10-NEXT: s_setpc_b64 s[30:31] 1822; 1823; GFX11-LABEL: buffer_load_v2p6__voffset_add: 1824; GFX11: ; %bb.0: 1825; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1826; GFX11-NEXT: buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60 1827; GFX11-NEXT: s_waitcnt vmcnt(0) 1828; GFX11-NEXT: s_setpc_b64 s[30:31] 1829 %voffset.add = add i32 %voffset, 60 1830 %data = call <2 x ptr addrspace(6)> @llvm.amdgcn.raw.ptr.buffer.load.v2p6(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1831 ret <2 x ptr addrspace(6)> %data 1832} 1833 1834define <3 x ptr addrspace(6)> @buffer_load_v3p6__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { 1835; GFX10-LABEL: buffer_load_v3p6__voffset_add: 1836; GFX10: ; %bb.0: 1837; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1838; GFX10-NEXT: buffer_load_dwordx3 v[0:2], v0, s[16:19], 0 offen offset:60 1839; GFX10-NEXT: s_waitcnt vmcnt(0) 1840; GFX10-NEXT: s_setpc_b64 s[30:31] 1841; 1842; GFX11-LABEL: buffer_load_v3p6__voffset_add: 1843; GFX11: ; %bb.0: 1844; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1845; GFX11-NEXT: buffer_load_b96 v[0:2], v0, s[0:3], 0 offen offset:60 1846; GFX11-NEXT: s_waitcnt vmcnt(0) 1847; GFX11-NEXT: s_setpc_b64 s[30:31] 1848 %voffset.add = add i32 %voffset, 60 1849 %data = call <3 x ptr addrspace(6)> @llvm.amdgcn.raw.ptr.buffer.load.v3p6(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1850 ret <3 x ptr addrspace(6)> %data 1851} 1852 1853define <4 x ptr addrspace(6)> @buffer_load_v4p6__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) { 1854; PREGFX10-LABEL: buffer_load_v4p6__voffset_add: 1855; PREGFX10: ; %bb.0: 1856; PREGFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1857; PREGFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60 1858; PREGFX10-NEXT: s_waitcnt vmcnt(0) 1859; PREGFX10-NEXT: s_setpc_b64 s[30:31] 1860; 1861; GFX10-LABEL: buffer_load_v4p6__voffset_add: 1862; GFX10: ; %bb.0: 1863; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1864; GFX10-NEXT: buffer_load_dwordx4 v[0:3], v0, s[16:19], 0 offen offset:60 1865; GFX10-NEXT: s_waitcnt vmcnt(0) 1866; GFX10-NEXT: s_setpc_b64 s[30:31] 1867; 1868; GFX11-LABEL: buffer_load_v4p6__voffset_add: 1869; GFX11: ; %bb.0: 1870; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1871; GFX11-NEXT: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60 1872; GFX11-NEXT: s_waitcnt vmcnt(0) 1873; GFX11-NEXT: s_setpc_b64 s[30:31] 1874 %voffset.add = add i32 %voffset, 60 1875 %data = call <4 x ptr addrspace(6)> @llvm.amdgcn.raw.ptr.buffer.load.v4p6(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1876 ret <4 x ptr addrspace(6)> %data 1877} 1878 1879declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) #0 1880declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32) #0 1881declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32) #0 1882declare i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8), i32, i32, i32) #0 1883declare <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8), i32, i32, i32) #0 1884declare <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8), i32, i32, i32) #0 1885declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 1886declare i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8), i32, i32, i32) #0 1887declare i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8), i32, i32, i32) #0 1888declare <2 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v2i16(ptr addrspace(8), i32, i32, i32) #0 1889declare <4 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v4i16(ptr addrspace(8), i32, i32, i32) #0 1890declare half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8), i32, i32, i32) #0 1891declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8), i32, i32, i32) #0 1892declare <4 x half> @llvm.amdgcn.raw.ptr.buffer.load.v4f16(ptr addrspace(8), i32, i32, i32) #0 1893attributes #0 = { nounwind readonly } 1894