1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2;RUN: llc < %s -mtriple=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck -check-prefix=VERDE %s 3;RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s 4 5define amdgpu_ps void @buffer_store(ptr addrspace(8) inreg, <4 x float>, <4 x float>, <4 x float>) { 6; VERDE-LABEL: buffer_store: 7; VERDE: ; %bb.0: ; %main_body 8; VERDE-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 9; VERDE-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 glc 10; VERDE-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 slc 11; VERDE-NEXT: s_endpgm 12; 13; CHECK-LABEL: buffer_store: 14; CHECK: ; %bb.0: ; %main_body 15; CHECK-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 16; CHECK-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 glc 17; CHECK-NEXT: buffer_store_dwordx4 v[8:11], off, s[0:3], 0 slc 18; CHECK-NEXT: s_endpgm 19main_body: 20 call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %1, ptr addrspace(8) %0, i32 0, i32 0, i32 0) 21 call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %2, ptr addrspace(8) %0, i32 0, i32 0, i32 1) 22 call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %3, ptr addrspace(8) %0, i32 0, i32 0, i32 2) 23 ret void 24} 25 26define amdgpu_ps void @buffer_store_immoffs(ptr addrspace(8) inreg, <4 x float>) { 27; VERDE-LABEL: buffer_store_immoffs: 28; VERDE: ; %bb.0: ; %main_body 29; VERDE-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:42 30; VERDE-NEXT: s_endpgm 31; 32; CHECK-LABEL: buffer_store_immoffs: 33; CHECK: ; %bb.0: ; %main_body 34; CHECK-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:42 35; CHECK-NEXT: s_endpgm 36main_body: 37 call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %1, ptr addrspace(8) %0, i32 42, i32 0, i32 0) 38 ret void 39} 40 41define amdgpu_ps void @buffer_store_ofs(ptr addrspace(8) inreg, <4 x float>, i32) { 42; VERDE-LABEL: buffer_store_ofs: 43; VERDE: ; %bb.0: ; %main_body 44; VERDE-NEXT: buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 offen 45; VERDE-NEXT: s_endpgm 46; 47; CHECK-LABEL: buffer_store_ofs: 48; CHECK: ; %bb.0: ; %main_body 49; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 offen 50; CHECK-NEXT: s_endpgm 51main_body: 52 call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %1, ptr addrspace(8) %0, i32 %2, i32 0, i32 0) 53 ret void 54} 55 56; Ideally, the register allocator would avoid the wait here 57define amdgpu_ps void @buffer_store_wait(ptr addrspace(8) inreg, <4 x float>, i32, i32, i32) { 58; VERDE-LABEL: buffer_store_wait: 59; VERDE: ; %bb.0: ; %main_body 60; VERDE-NEXT: buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 offen 61; VERDE-NEXT: s_waitcnt expcnt(0) 62; VERDE-NEXT: buffer_load_dwordx4 v[0:3], v5, s[0:3], 0 offen 63; VERDE-NEXT: s_waitcnt vmcnt(0) 64; VERDE-NEXT: buffer_store_dwordx4 v[0:3], v6, s[0:3], 0 offen 65; VERDE-NEXT: s_endpgm 66; 67; CHECK-LABEL: buffer_store_wait: 68; CHECK: ; %bb.0: ; %main_body 69; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 offen 70; CHECK-NEXT: buffer_load_dwordx4 v[0:3], v5, s[0:3], 0 offen 71; CHECK-NEXT: s_waitcnt vmcnt(0) 72; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v6, s[0:3], 0 offen 73; CHECK-NEXT: s_endpgm 74main_body: 75 call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %1, ptr addrspace(8) %0, i32 %2, i32 0, i32 0) 76 %data = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) %0, i32 %3, i32 0, i32 0) 77 call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> %data, ptr addrspace(8) %0, i32 %4, i32 0, i32 0) 78 ret void 79} 80 81define amdgpu_ps void @buffer_store_x1(ptr addrspace(8) inreg %rsrc, float %data, i32 %offset) { 82; VERDE-LABEL: buffer_store_x1: 83; VERDE: ; %bb.0: ; %main_body 84; VERDE-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 85; VERDE-NEXT: s_endpgm 86; 87; CHECK-LABEL: buffer_store_x1: 88; CHECK: ; %bb.0: ; %main_body 89; CHECK-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 90; CHECK-NEXT: s_endpgm 91main_body: 92 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %data, ptr addrspace(8) %rsrc, i32 %offset, i32 0, i32 0) 93 ret void 94} 95 96define amdgpu_ps void @buffer_store_x2(ptr addrspace(8) inreg %rsrc, <2 x float> %data, i32 %offset) #0 { 97; VERDE-LABEL: buffer_store_x2: 98; VERDE: ; %bb.0: ; %main_body 99; VERDE-NEXT: buffer_store_dwordx2 v[0:1], v2, s[0:3], 0 offen 100; VERDE-NEXT: s_endpgm 101; 102; CHECK-LABEL: buffer_store_x2: 103; CHECK: ; %bb.0: ; %main_body 104; CHECK-NEXT: buffer_store_dwordx2 v[0:1], v2, s[0:3], 0 offen 105; CHECK-NEXT: s_endpgm 106main_body: 107 call void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float> %data, ptr addrspace(8) %rsrc, i32 %offset, i32 0, i32 0) 108 ret void 109} 110 111define amdgpu_ps void @buffer_store_x1_offen_merged_and(ptr addrspace(8) inreg %rsrc, i32 %a, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) { 112; VERDE-LABEL: buffer_store_x1_offen_merged_and: 113; VERDE: ; %bb.0: 114; VERDE-NEXT: buffer_store_dwordx4 v[1:4], v0, s[0:3], 0 offen offset:4 115; VERDE-NEXT: buffer_store_dwordx2 v[5:6], v0, s[0:3], 0 offen offset:28 116; VERDE-NEXT: s_endpgm 117; 118; CHECK-LABEL: buffer_store_x1_offen_merged_and: 119; CHECK: ; %bb.0: 120; CHECK-NEXT: buffer_store_dwordx4 v[1:4], v0, s[0:3], 0 offen offset:4 121; CHECK-NEXT: buffer_store_dwordx2 v[5:6], v0, s[0:3], 0 offen offset:28 122; CHECK-NEXT: s_endpgm 123 %a1 = add i32 %a, 4 124 %a2 = add i32 %a, 8 125 %a3 = add i32 %a, 12 126 %a4 = add i32 %a, 16 127 %a5 = add i32 %a, 28 128 %a6 = add i32 %a, 32 129 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v1, ptr addrspace(8) %rsrc, i32 %a1, i32 0, i32 0) 130 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v2, ptr addrspace(8) %rsrc, i32 %a2, i32 0, i32 0) 131 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v3, ptr addrspace(8) %rsrc, i32 %a3, i32 0, i32 0) 132 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v4, ptr addrspace(8) %rsrc, i32 %a4, i32 0, i32 0) 133 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v5, ptr addrspace(8) %rsrc, i32 %a5, i32 0, i32 0) 134 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v6, ptr addrspace(8) %rsrc, i32 %a6, i32 0, i32 0) 135 ret void 136} 137 138define amdgpu_ps void @buffer_store_x1_offen_merged_or(ptr addrspace(8) inreg %rsrc, i32 %inp, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) { 139; VERDE-LABEL: buffer_store_x1_offen_merged_or: 140; VERDE: ; %bb.0: 141; VERDE-NEXT: v_lshlrev_b32_e32 v0, 6, v0 142; VERDE-NEXT: buffer_store_dwordx4 v[1:4], v0, s[0:3], 0 offen offset:4 143; VERDE-NEXT: buffer_store_dwordx2 v[5:6], v0, s[0:3], 0 offen offset:28 144; VERDE-NEXT: s_endpgm 145; 146; CHECK-LABEL: buffer_store_x1_offen_merged_or: 147; CHECK: ; %bb.0: 148; CHECK-NEXT: v_lshlrev_b32_e32 v0, 6, v0 149; CHECK-NEXT: buffer_store_dwordx4 v[1:4], v0, s[0:3], 0 offen offset:4 150; CHECK-NEXT: buffer_store_dwordx2 v[5:6], v0, s[0:3], 0 offen offset:28 151; CHECK-NEXT: s_endpgm 152 %a = shl i32 %inp, 6 153 %a1 = add i32 %a, 4 154 %a2 = add i32 %a, 8 155 %a3 = add i32 %a, 12 156 %a4 = add i32 %a, 16 157 %a5 = add i32 %a, 28 158 %a6 = add i32 %a, 32 159 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v1, ptr addrspace(8) %rsrc, i32 %a1, i32 0, i32 0) 160 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v2, ptr addrspace(8) %rsrc, i32 %a2, i32 0, i32 0) 161 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v3, ptr addrspace(8) %rsrc, i32 %a3, i32 0, i32 0) 162 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v4, ptr addrspace(8) %rsrc, i32 %a4, i32 0, i32 0) 163 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v5, ptr addrspace(8) %rsrc, i32 %a5, i32 0, i32 0) 164 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v6, ptr addrspace(8) %rsrc, i32 %a6, i32 0, i32 0) 165 ret void 166} 167 168 169define amdgpu_ps void @buffer_store_x1_offen_merged_glc_slc(ptr addrspace(8) inreg %rsrc, i32 %a, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) { 170; VERDE-LABEL: buffer_store_x1_offen_merged_glc_slc: 171; VERDE: ; %bb.0: 172; VERDE-NEXT: buffer_store_dwordx2 v[1:2], v0, s[0:3], 0 offen offset:4 173; VERDE-NEXT: buffer_store_dwordx2 v[3:4], v0, s[0:3], 0 offen offset:12 glc 174; VERDE-NEXT: buffer_store_dwordx2 v[5:6], v0, s[0:3], 0 offen offset:28 glc slc 175; VERDE-NEXT: s_endpgm 176; 177; CHECK-LABEL: buffer_store_x1_offen_merged_glc_slc: 178; CHECK: ; %bb.0: 179; CHECK-NEXT: buffer_store_dwordx2 v[1:2], v0, s[0:3], 0 offen offset:4 180; CHECK-NEXT: buffer_store_dwordx2 v[3:4], v0, s[0:3], 0 offen offset:12 glc 181; CHECK-NEXT: buffer_store_dwordx2 v[5:6], v0, s[0:3], 0 offen offset:28 glc slc 182; CHECK-NEXT: s_endpgm 183 %a1 = add i32 %a, 4 184 %a2 = add i32 %a, 8 185 %a3 = add i32 %a, 12 186 %a4 = add i32 %a, 16 187 %a5 = add i32 %a, 28 188 %a6 = add i32 %a, 32 189 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v1, ptr addrspace(8) %rsrc, i32 %a1, i32 0, i32 0) 190 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v2, ptr addrspace(8) %rsrc, i32 %a2, i32 0, i32 0) 191 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v3, ptr addrspace(8) %rsrc, i32 %a3, i32 0, i32 1) 192 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v4, ptr addrspace(8) %rsrc, i32 %a4, i32 0, i32 1) 193 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v5, ptr addrspace(8) %rsrc, i32 %a5, i32 0, i32 3) 194 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v6, ptr addrspace(8) %rsrc, i32 %a6, i32 0, i32 3) 195 ret void 196} 197 198define amdgpu_ps void @buffer_store_x2_offen_merged_and(ptr addrspace(8) inreg %rsrc, i32 %a, <2 x float> %v1, <2 x float> %v2) { 199; VERDE-LABEL: buffer_store_x2_offen_merged_and: 200; VERDE: ; %bb.0: 201; VERDE-NEXT: buffer_store_dwordx4 v[1:4], v0, s[0:3], 0 offen offset:4 202; VERDE-NEXT: s_endpgm 203; 204; CHECK-LABEL: buffer_store_x2_offen_merged_and: 205; CHECK: ; %bb.0: 206; CHECK-NEXT: buffer_store_dwordx4 v[1:4], v0, s[0:3], 0 offen offset:4 207; CHECK-NEXT: s_endpgm 208 %a1 = add i32 %a, 4 209 %a2 = add i32 %a, 12 210 call void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float> %v1, ptr addrspace(8) %rsrc, i32 %a1, i32 0, i32 0) 211 call void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float> %v2, ptr addrspace(8) %rsrc, i32 %a2, i32 0, i32 0) 212 ret void 213} 214 215define amdgpu_ps void @buffer_store_x2_offen_merged_or(ptr addrspace(8) inreg %rsrc, i32 %inp, <2 x float> %v1, <2 x float> %v2) { 216; VERDE-LABEL: buffer_store_x2_offen_merged_or: 217; VERDE: ; %bb.0: 218; VERDE-NEXT: v_lshlrev_b32_e32 v0, 4, v0 219; VERDE-NEXT: buffer_store_dwordx4 v[1:4], v0, s[0:3], 0 offen offset:4 220; VERDE-NEXT: s_endpgm 221; 222; CHECK-LABEL: buffer_store_x2_offen_merged_or: 223; CHECK: ; %bb.0: 224; CHECK-NEXT: v_lshlrev_b32_e32 v0, 4, v0 225; CHECK-NEXT: buffer_store_dwordx4 v[1:4], v0, s[0:3], 0 offen offset:4 226; CHECK-NEXT: s_endpgm 227 %a = shl i32 %inp, 4 228 %a1 = add i32 %a, 4 229 %a2 = add i32 %a, 12 230 call void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float> %v1, ptr addrspace(8) %rsrc, i32 %a1, i32 0, i32 0) 231 call void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float> %v2, ptr addrspace(8) %rsrc, i32 %a2, i32 0, i32 0) 232 ret void 233} 234 235define amdgpu_ps void @buffer_store_x1_offset_merged(ptr addrspace(8) inreg %rsrc, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) { 236; VERDE-LABEL: buffer_store_x1_offset_merged: 237; VERDE: ; %bb.0: 238; VERDE-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:4 239; VERDE-NEXT: buffer_store_dwordx2 v[4:5], off, s[0:3], 0 offset:28 240; VERDE-NEXT: s_endpgm 241; 242; CHECK-LABEL: buffer_store_x1_offset_merged: 243; CHECK: ; %bb.0: 244; CHECK-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:4 245; CHECK-NEXT: buffer_store_dwordx2 v[4:5], off, s[0:3], 0 offset:28 246; CHECK-NEXT: s_endpgm 247 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v1, ptr addrspace(8) %rsrc, i32 4, i32 0, i32 0) 248 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v2, ptr addrspace(8) %rsrc, i32 8, i32 0, i32 0) 249 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v3, ptr addrspace(8) %rsrc, i32 12, i32 0, i32 0) 250 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v4, ptr addrspace(8) %rsrc, i32 16, i32 0, i32 0) 251 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v5, ptr addrspace(8) %rsrc, i32 28, i32 0, i32 0) 252 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v6, ptr addrspace(8) %rsrc, i32 32, i32 0, i32 0) 253 ret void 254} 255 256define amdgpu_ps void @buffer_store_x2_offset_merged(ptr addrspace(8) inreg %rsrc, <2 x float> %v1,<2 x float> %v2) { 257; VERDE-LABEL: buffer_store_x2_offset_merged: 258; VERDE: ; %bb.0: 259; VERDE-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:4 260; VERDE-NEXT: s_endpgm 261; 262; CHECK-LABEL: buffer_store_x2_offset_merged: 263; CHECK: ; %bb.0: 264; CHECK-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:4 265; CHECK-NEXT: s_endpgm 266 call void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float> %v1, ptr addrspace(8) %rsrc, i32 4, i32 0, i32 0) 267 call void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float> %v2, ptr addrspace(8) %rsrc, i32 12, i32 0, i32 0) 268 ret void 269} 270 271define amdgpu_ps void @buffer_store_int(ptr addrspace(8) inreg, <4 x i32>, <2 x i32>, i32) { 272; VERDE-LABEL: buffer_store_int: 273; VERDE: ; %bb.0: ; %main_body 274; VERDE-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 275; VERDE-NEXT: buffer_store_dwordx2 v[4:5], off, s[0:3], 0 glc 276; VERDE-NEXT: buffer_store_dword v6, off, s[0:3], 0 slc 277; VERDE-NEXT: s_endpgm 278; 279; CHECK-LABEL: buffer_store_int: 280; CHECK: ; %bb.0: ; %main_body 281; CHECK-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 282; CHECK-NEXT: buffer_store_dwordx2 v[4:5], off, s[0:3], 0 glc 283; CHECK-NEXT: buffer_store_dword v6, off, s[0:3], 0 slc 284; CHECK-NEXT: s_endpgm 285main_body: 286 call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> %1, ptr addrspace(8) %0, i32 0, i32 0, i32 0) 287 call void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32> %2, ptr addrspace(8) %0, i32 0, i32 0, i32 1) 288 call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 %3, ptr addrspace(8) %0, i32 0, i32 0, i32 2) 289 ret void 290} 291 292define amdgpu_ps void @raw_ptr_buffer_store_byte(ptr addrspace(8) inreg %rsrc, float %v1) { 293; VERDE-LABEL: raw_ptr_buffer_store_byte: 294; VERDE: ; %bb.0: ; %main_body 295; VERDE-NEXT: v_cvt_u32_f32_e32 v0, v0 296; VERDE-NEXT: buffer_store_byte v0, off, s[0:3], 0 297; VERDE-NEXT: s_endpgm 298; 299; CHECK-LABEL: raw_ptr_buffer_store_byte: 300; CHECK: ; %bb.0: ; %main_body 301; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 302; CHECK-NEXT: buffer_store_byte v0, off, s[0:3], 0 303; CHECK-NEXT: s_endpgm 304main_body: 305 %v2 = fptoui float %v1 to i32 306 %v3 = trunc i32 %v2 to i8 307 call void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8 %v3, ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 308 ret void 309} 310 311define amdgpu_ps void @raw_ptr_buffer_store_short(ptr addrspace(8) inreg %rsrc, float %v1) { 312; VERDE-LABEL: raw_ptr_buffer_store_short: 313; VERDE: ; %bb.0: ; %main_body 314; VERDE-NEXT: v_cvt_u32_f32_e32 v0, v0 315; VERDE-NEXT: buffer_store_short v0, off, s[0:3], 0 316; VERDE-NEXT: s_endpgm 317; 318; CHECK-LABEL: raw_ptr_buffer_store_short: 319; CHECK: ; %bb.0: ; %main_body 320; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v0 321; CHECK-NEXT: buffer_store_short v0, off, s[0:3], 0 322; CHECK-NEXT: s_endpgm 323main_body: 324 %v2 = fptoui float %v1 to i32 325 %v3 = trunc i32 %v2 to i16 326 call void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16 %v3, ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 327 ret void 328} 329 330define amdgpu_ps void @raw_ptr_buffer_store_f16(ptr addrspace(8) inreg %rsrc, i32 %v1) { 331; VERDE-LABEL: raw_ptr_buffer_store_f16: 332; VERDE: ; %bb.0: ; %main_body 333; VERDE-NEXT: buffer_store_short v0, off, s[0:3], 0 334; VERDE-NEXT: s_endpgm 335; 336; CHECK-LABEL: raw_ptr_buffer_store_f16: 337; CHECK: ; %bb.0: ; %main_body 338; CHECK-NEXT: buffer_store_short v0, off, s[0:3], 0 339; CHECK-NEXT: s_endpgm 340main_body: 341 %trunc = trunc i32 %v1 to i16 342 %cast = bitcast i16 %trunc to half 343 call void @llvm.amdgcn.raw.ptr.buffer.store.f16(half %cast, ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 344 ret void 345} 346 347define amdgpu_ps void @buffer_store_v2f16(ptr addrspace(8) inreg %rsrc, <2 x half> %data, i32 %offset) { 348; VERDE-LABEL: buffer_store_v2f16: 349; VERDE: ; %bb.0: ; %main_body 350; VERDE-NEXT: v_cvt_f16_f32_e32 v1, v1 351; VERDE-NEXT: v_cvt_f16_f32_e32 v0, v0 352; VERDE-NEXT: v_lshlrev_b32_e32 v1, 16, v1 353; VERDE-NEXT: v_or_b32_e32 v0, v0, v1 354; VERDE-NEXT: buffer_store_dword v0, v2, s[0:3], 0 offen 355; VERDE-NEXT: s_endpgm 356; 357; CHECK-LABEL: buffer_store_v2f16: 358; CHECK: ; %bb.0: ; %main_body 359; CHECK-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 360; CHECK-NEXT: s_endpgm 361main_body: 362 call void @llvm.amdgcn.raw.ptr.buffer.store.v2f16(<2 x half> %data, ptr addrspace(8) %rsrc, i32 %offset, i32 0, i32 0) 363 ret void 364} 365 366define amdgpu_ps void @buffer_store_v4f16(ptr addrspace(8) inreg %rsrc, <4 x half> %data, i32 %offset) #0 { 367; VERDE-LABEL: buffer_store_v4f16: 368; VERDE: ; %bb.0: ; %main_body 369; VERDE-NEXT: v_cvt_f16_f32_e32 v3, v3 370; VERDE-NEXT: v_cvt_f16_f32_e32 v2, v2 371; VERDE-NEXT: v_cvt_f16_f32_e32 v5, v1 372; VERDE-NEXT: v_cvt_f16_f32_e32 v0, v0 373; VERDE-NEXT: v_lshlrev_b32_e32 v1, 16, v3 374; VERDE-NEXT: v_or_b32_e32 v1, v2, v1 375; VERDE-NEXT: v_lshlrev_b32_e32 v2, 16, v5 376; VERDE-NEXT: v_or_b32_e32 v0, v0, v2 377; VERDE-NEXT: buffer_store_dwordx2 v[0:1], v4, s[0:3], 0 offen 378; VERDE-NEXT: s_endpgm 379; 380; CHECK-LABEL: buffer_store_v4f16: 381; CHECK: ; %bb.0: ; %main_body 382; CHECK-NEXT: buffer_store_dwordx2 v[0:1], v2, s[0:3], 0 offen 383; CHECK-NEXT: s_endpgm 384main_body: 385 call void @llvm.amdgcn.raw.ptr.buffer.store.v4f16(<4 x half> %data, ptr addrspace(8) %rsrc, i32 %offset, i32 0, i32 0) 386 ret void 387} 388 389define amdgpu_ps void @buffer_store_v8f16(ptr addrspace(8) inreg %rsrc, <8 x half> %data, i32 %offset) #0 { 390; VERDE-LABEL: buffer_store_v8f16: 391; VERDE: ; %bb.0: ; %main_body 392; VERDE-NEXT: v_cvt_f16_f32_e32 v7, v7 393; VERDE-NEXT: v_cvt_f16_f32_e32 v6, v6 394; VERDE-NEXT: v_cvt_f16_f32_e32 v9, v5 395; VERDE-NEXT: v_cvt_f16_f32_e32 v3, v3 396; VERDE-NEXT: v_cvt_f16_f32_e32 v1, v1 397; VERDE-NEXT: v_cvt_f16_f32_e32 v4, v4 398; VERDE-NEXT: v_cvt_f16_f32_e32 v2, v2 399; VERDE-NEXT: v_cvt_f16_f32_e32 v0, v0 400; VERDE-NEXT: v_lshlrev_b32_e32 v5, 16, v7 401; VERDE-NEXT: v_or_b32_e32 v5, v6, v5 402; VERDE-NEXT: v_lshlrev_b32_e32 v6, 16, v9 403; VERDE-NEXT: v_lshlrev_b32_e32 v3, 16, v3 404; VERDE-NEXT: v_lshlrev_b32_e32 v1, 16, v1 405; VERDE-NEXT: v_or_b32_e32 v4, v4, v6 406; VERDE-NEXT: v_or_b32_e32 v3, v2, v3 407; VERDE-NEXT: v_or_b32_e32 v2, v0, v1 408; VERDE-NEXT: buffer_store_dwordx4 v[2:5], v8, s[0:3], 0 offen 409; VERDE-NEXT: s_endpgm 410; 411; CHECK-LABEL: buffer_store_v8f16: 412; CHECK: ; %bb.0: ; %main_body 413; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 offen 414; CHECK-NEXT: s_endpgm 415main_body: 416 call void @llvm.amdgcn.raw.ptr.buffer.store.v8f16(<8 x half> %data, ptr addrspace(8) %rsrc, i32 %offset, i32 0, i32 0) 417 ret void 418} 419 420define amdgpu_ps void @buffer_store_v2bf16(ptr addrspace(8) inreg %rsrc, <2 x bfloat> %data, i32 %offset) { 421; VERDE-LABEL: buffer_store_v2bf16: 422; VERDE: ; %bb.0: 423; VERDE-NEXT: v_mul_f32_e32 v1, 1.0, v1 424; VERDE-NEXT: v_lshrrev_b32_e32 v1, 16, v1 425; VERDE-NEXT: v_mul_f32_e32 v0, 1.0, v0 426; VERDE-NEXT: v_alignbit_b32 v0, v1, v0, 16 427; VERDE-NEXT: buffer_store_dword v0, v2, s[0:3], 0 offen 428; VERDE-NEXT: s_endpgm 429; 430; CHECK-LABEL: buffer_store_v2bf16: 431; CHECK: ; %bb.0: 432; CHECK-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 433; CHECK-NEXT: s_endpgm 434 call void @llvm.amdgcn.raw.ptr.buffer.store.v2bf16(<2 x bfloat> %data, ptr addrspace(8) %rsrc, i32 %offset, i32 0, i32 0) 435 ret void 436} 437 438define amdgpu_ps void @buffer_store_v4bf16(ptr addrspace(8) inreg %rsrc, <4 x bfloat> %data, i32 %offset) #0 { 439; VERDE-LABEL: buffer_store_v4bf16: 440; VERDE: ; %bb.0: 441; VERDE-NEXT: v_mul_f32_e32 v3, 1.0, v3 442; VERDE-NEXT: v_mul_f32_e32 v1, 1.0, v1 443; VERDE-NEXT: v_lshrrev_b32_e32 v3, 16, v3 444; VERDE-NEXT: v_mul_f32_e32 v2, 1.0, v2 445; VERDE-NEXT: v_lshrrev_b32_e32 v1, 16, v1 446; VERDE-NEXT: v_mul_f32_e32 v0, 1.0, v0 447; VERDE-NEXT: v_alignbit_b32 v2, v3, v2, 16 448; VERDE-NEXT: v_alignbit_b32 v1, v1, v0, 16 449; VERDE-NEXT: buffer_store_dwordx2 v[1:2], v4, s[0:3], 0 offen 450; VERDE-NEXT: s_endpgm 451; 452; CHECK-LABEL: buffer_store_v4bf16: 453; CHECK: ; %bb.0: 454; CHECK-NEXT: buffer_store_dwordx2 v[0:1], v2, s[0:3], 0 offen 455; CHECK-NEXT: s_endpgm 456 call void @llvm.amdgcn.raw.ptr.buffer.store.v4bf16(<4 x bfloat> %data, ptr addrspace(8) %rsrc, i32 %offset, i32 0, i32 0) 457 ret void 458} 459 460define amdgpu_ps void @raw_ptr_buffer_store_i16(ptr addrspace(8) inreg %rsrc, i32 %v1) { 461; VERDE-LABEL: raw_ptr_buffer_store_i16: 462; VERDE: ; %bb.0: ; %main_body 463; VERDE-NEXT: buffer_store_short v0, off, s[0:3], 0 464; VERDE-NEXT: s_endpgm 465; 466; CHECK-LABEL: raw_ptr_buffer_store_i16: 467; CHECK: ; %bb.0: ; %main_body 468; CHECK-NEXT: buffer_store_short v0, off, s[0:3], 0 469; CHECK-NEXT: s_endpgm 470main_body: 471 %trunc = trunc i32 %v1 to i16 472 call void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16 %trunc, ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 473 ret void 474} 475 476define amdgpu_ps void @buffer_store_v2i16(ptr addrspace(8) inreg %rsrc, <2 x i16> %data, i32 %offset) { 477; VERDE-LABEL: buffer_store_v2i16: 478; VERDE: ; %bb.0: ; %main_body 479; VERDE-NEXT: v_lshlrev_b32_e32 v1, 16, v1 480; VERDE-NEXT: v_and_b32_e32 v0, 0xffff, v0 481; VERDE-NEXT: v_or_b32_e32 v0, v0, v1 482; VERDE-NEXT: buffer_store_dword v0, v2, s[0:3], 0 offen 483; VERDE-NEXT: s_endpgm 484; 485; CHECK-LABEL: buffer_store_v2i16: 486; CHECK: ; %bb.0: ; %main_body 487; CHECK-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen 488; CHECK-NEXT: s_endpgm 489main_body: 490 call void @llvm.amdgcn.raw.ptr.buffer.store.v2i16(<2 x i16> %data, ptr addrspace(8) %rsrc, i32 %offset, i32 0, i32 0) 491 ret void 492} 493 494define amdgpu_ps void @buffer_store_v4i16(ptr addrspace(8) inreg %rsrc, <4 x i16> %data, i32 %offset) #0 { 495; VERDE-LABEL: buffer_store_v4i16: 496; VERDE: ; %bb.0: ; %main_body 497; VERDE-NEXT: v_lshlrev_b32_e32 v3, 16, v3 498; VERDE-NEXT: v_and_b32_e32 v2, 0xffff, v2 499; VERDE-NEXT: v_lshlrev_b32_e32 v1, 16, v1 500; VERDE-NEXT: v_and_b32_e32 v0, 0xffff, v0 501; VERDE-NEXT: v_or_b32_e32 v2, v2, v3 502; VERDE-NEXT: v_or_b32_e32 v1, v0, v1 503; VERDE-NEXT: buffer_store_dwordx2 v[1:2], v4, s[0:3], 0 offen 504; VERDE-NEXT: s_endpgm 505; 506; CHECK-LABEL: buffer_store_v4i16: 507; CHECK: ; %bb.0: ; %main_body 508; CHECK-NEXT: buffer_store_dwordx2 v[0:1], v2, s[0:3], 0 offen 509; CHECK-NEXT: s_endpgm 510main_body: 511 call void @llvm.amdgcn.raw.ptr.buffer.store.v4i16(<4 x i16> %data, ptr addrspace(8) %rsrc, i32 %offset, i32 0, i32 0) 512 ret void 513} 514 515; FIXME: 516; define amdgpu_ps void @buffer_store_v6i16(ptr addrspace(8) inreg %rsrc, <6 x i16> %data, i32 %offset) #0 { 517; main_body: 518; call void @llvm.amdgcn.raw.ptr.buffer.store.v6i16(<6 x i16> %data, ptr addrspace(8) %rsrc, i32 %offset, i32 0, i32 0) 519; ret void 520; } 521define amdgpu_ps void @buffer_store_v8i16(ptr addrspace(8) inreg %rsrc, <8 x i16> %data, i32 %offset) #0 { 522; VERDE-LABEL: buffer_store_v8i16: 523; VERDE: ; %bb.0: ; %main_body 524; VERDE-NEXT: v_lshlrev_b32_e32 v7, 16, v7 525; VERDE-NEXT: v_and_b32_e32 v6, 0xffff, v6 526; VERDE-NEXT: v_lshlrev_b32_e32 v5, 16, v5 527; VERDE-NEXT: v_and_b32_e32 v4, 0xffff, v4 528; VERDE-NEXT: v_lshlrev_b32_e32 v3, 16, v3 529; VERDE-NEXT: v_and_b32_e32 v2, 0xffff, v2 530; VERDE-NEXT: v_lshlrev_b32_e32 v1, 16, v1 531; VERDE-NEXT: v_and_b32_e32 v0, 0xffff, v0 532; VERDE-NEXT: v_or_b32_e32 v6, v6, v7 533; VERDE-NEXT: v_or_b32_e32 v5, v4, v5 534; VERDE-NEXT: v_or_b32_e32 v4, v2, v3 535; VERDE-NEXT: v_or_b32_e32 v3, v0, v1 536; VERDE-NEXT: buffer_store_dwordx4 v[3:6], v8, s[0:3], 0 offen 537; VERDE-NEXT: s_endpgm 538; 539; CHECK-LABEL: buffer_store_v8i16: 540; CHECK: ; %bb.0: ; %main_body 541; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 offen 542; CHECK-NEXT: s_endpgm 543main_body: 544 call void @llvm.amdgcn.raw.ptr.buffer.store.v8i16(<8 x i16> %data, ptr addrspace(8) %rsrc, i32 %offset, i32 0, i32 0) 545 ret void 546} 547 548define amdgpu_ps void @raw_ptr_buffer_store_x1_offset_merged(ptr addrspace(8) inreg %rsrc, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) { 549; VERDE-LABEL: raw_ptr_buffer_store_x1_offset_merged: 550; VERDE: ; %bb.0: 551; VERDE-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:4 552; VERDE-NEXT: buffer_store_dwordx2 v[4:5], off, s[0:3], 0 offset:28 553; VERDE-NEXT: s_endpgm 554; 555; CHECK-LABEL: raw_ptr_buffer_store_x1_offset_merged: 556; CHECK: ; %bb.0: 557; CHECK-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 offset:4 558; CHECK-NEXT: buffer_store_dwordx2 v[4:5], off, s[0:3], 0 offset:28 559; CHECK-NEXT: s_endpgm 560 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v1, ptr addrspace(8) %rsrc, i32 4, i32 0, i32 0) 561 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v2, ptr addrspace(8) %rsrc, i32 8, i32 0, i32 0) 562 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v3, ptr addrspace(8) %rsrc, i32 12, i32 0, i32 0) 563 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v4, ptr addrspace(8) %rsrc, i32 16, i32 0, i32 0) 564 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v5, ptr addrspace(8) %rsrc, i32 28, i32 0, i32 0) 565 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v6, ptr addrspace(8) %rsrc, i32 32, i32 0, i32 0) 566 ret void 567} 568 569define amdgpu_ps void @raw_ptr_buffer_store_x1_offset_swizzled_not_merged(ptr addrspace(8) inreg %rsrc, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) { 570; VERDE-LABEL: raw_ptr_buffer_store_x1_offset_swizzled_not_merged: 571; VERDE: ; %bb.0: 572; VERDE-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 573; VERDE-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:8 574; VERDE-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:12 575; VERDE-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:16 576; VERDE-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:28 577; VERDE-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:32 578; VERDE-NEXT: s_endpgm 579; 580; CHECK-LABEL: raw_ptr_buffer_store_x1_offset_swizzled_not_merged: 581; CHECK: ; %bb.0: 582; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 583; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], 0 offset:8 584; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:12 585; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], 0 offset:16 586; CHECK-NEXT: buffer_store_dword v4, off, s[0:3], 0 offset:28 587; CHECK-NEXT: buffer_store_dword v5, off, s[0:3], 0 offset:32 588; CHECK-NEXT: s_endpgm 589 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v1, ptr addrspace(8) %rsrc, i32 4, i32 0, i32 8) 590 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v2, ptr addrspace(8) %rsrc, i32 8, i32 0, i32 8) 591 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v3, ptr addrspace(8) %rsrc, i32 12, i32 0, i32 8) 592 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v4, ptr addrspace(8) %rsrc, i32 16, i32 0, i32 8) 593 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v5, ptr addrspace(8) %rsrc, i32 28, i32 0, i32 8) 594 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %v6, ptr addrspace(8) %rsrc, i32 32, i32 0, i32 8) 595 ret void 596} 597 598define void @buffer_store_f64__voffset_add(ptr addrspace(8) inreg %rsrc, double %data, i32 %voffset) #0 { 599; VERDE-LABEL: buffer_store_f64__voffset_add: 600; VERDE: ; %bb.0: 601; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 602; VERDE-NEXT: buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60 603; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 604; VERDE-NEXT: s_setpc_b64 s[30:31] 605; 606; CHECK-LABEL: buffer_store_f64__voffset_add: 607; CHECK: ; %bb.0: 608; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 609; CHECK-NEXT: buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60 610; CHECK-NEXT: s_waitcnt vmcnt(0) 611; CHECK-NEXT: s_setpc_b64 s[30:31] 612 %voffset.add = add i32 %voffset, 60 613 call void @llvm.amdgcn.raw.ptr.buffer.store.f64(double %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 614 ret void 615} 616 617define void @buffer_store_v2f64__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x double> %data, i32 %voffset) #0 { 618; VERDE-LABEL: buffer_store_v2f64__voffset_add: 619; VERDE: ; %bb.0: 620; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 621; VERDE-NEXT: buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60 622; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 623; VERDE-NEXT: s_setpc_b64 s[30:31] 624; 625; CHECK-LABEL: buffer_store_v2f64__voffset_add: 626; CHECK: ; %bb.0: 627; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 628; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60 629; CHECK-NEXT: s_waitcnt vmcnt(0) 630; CHECK-NEXT: s_setpc_b64 s[30:31] 631 %voffset.add = add i32 %voffset, 60 632 call void @llvm.amdgcn.raw.ptr.buffer.store.v2f64(<2 x double> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 633 ret void 634} 635 636define void @buffer_store_i64__voffset_add(ptr addrspace(8) inreg %rsrc, i64 %data, i32 %voffset) #0 { 637; VERDE-LABEL: buffer_store_i64__voffset_add: 638; VERDE: ; %bb.0: 639; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 640; VERDE-NEXT: buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60 641; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 642; VERDE-NEXT: s_setpc_b64 s[30:31] 643; 644; CHECK-LABEL: buffer_store_i64__voffset_add: 645; CHECK: ; %bb.0: 646; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 647; CHECK-NEXT: buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60 648; CHECK-NEXT: s_waitcnt vmcnt(0) 649; CHECK-NEXT: s_setpc_b64 s[30:31] 650 %voffset.add = add i32 %voffset, 60 651 call void @llvm.amdgcn.raw.ptr.buffer.store.i64(i64 %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 652 ret void 653} 654 655define void @buffer_store_v2i64__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x i64> %data, i32 %voffset) #0 { 656; VERDE-LABEL: buffer_store_v2i64__voffset_add: 657; VERDE: ; %bb.0: 658; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 659; VERDE-NEXT: buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60 660; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 661; VERDE-NEXT: s_setpc_b64 s[30:31] 662; 663; CHECK-LABEL: buffer_store_v2i64__voffset_add: 664; CHECK: ; %bb.0: 665; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 666; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60 667; CHECK-NEXT: s_waitcnt vmcnt(0) 668; CHECK-NEXT: s_setpc_b64 s[30:31] 669 %voffset.add = add i32 %voffset, 60 670 call void @llvm.amdgcn.raw.ptr.buffer.store.v2i64(<2 x i64> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 671 ret void 672} 673 674define void @buffer_store_p0__voffset_add(ptr addrspace(8) inreg %rsrc, ptr %data, i32 %voffset) #0 { 675; VERDE-LABEL: buffer_store_p0__voffset_add: 676; VERDE: ; %bb.0: 677; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 678; VERDE-NEXT: buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60 679; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 680; VERDE-NEXT: s_setpc_b64 s[30:31] 681; 682; CHECK-LABEL: buffer_store_p0__voffset_add: 683; CHECK: ; %bb.0: 684; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 685; CHECK-NEXT: buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60 686; CHECK-NEXT: s_waitcnt vmcnt(0) 687; CHECK-NEXT: s_setpc_b64 s[30:31] 688 %voffset.add = add i32 %voffset, 60 689 call void @llvm.amdgcn.raw.ptr.buffer.store.p0(ptr %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 690 ret void 691} 692 693define void @buffer_store_v2p0__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr> %data, i32 %voffset) #0 { 694; VERDE-LABEL: buffer_store_v2p0__voffset_add: 695; VERDE: ; %bb.0: 696; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 697; VERDE-NEXT: buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60 698; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 699; VERDE-NEXT: s_setpc_b64 s[30:31] 700; 701; CHECK-LABEL: buffer_store_v2p0__voffset_add: 702; CHECK: ; %bb.0: 703; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 704; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60 705; CHECK-NEXT: s_waitcnt vmcnt(0) 706; CHECK-NEXT: s_setpc_b64 s[30:31] 707 %voffset.add = add i32 %voffset, 60 708 call void @llvm.amdgcn.raw.ptr.buffer.store.v2p0(<2 x ptr> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 709 ret void 710} 711 712define void @buffer_store_p1__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %data, i32 %voffset) #0 { 713; VERDE-LABEL: buffer_store_p1__voffset_add: 714; VERDE: ; %bb.0: 715; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 716; VERDE-NEXT: buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60 717; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 718; VERDE-NEXT: s_setpc_b64 s[30:31] 719; 720; CHECK-LABEL: buffer_store_p1__voffset_add: 721; CHECK: ; %bb.0: 722; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 723; CHECK-NEXT: buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60 724; CHECK-NEXT: s_waitcnt vmcnt(0) 725; CHECK-NEXT: s_setpc_b64 s[30:31] 726 %voffset.add = add i32 %voffset, 60 727 call void @llvm.amdgcn.raw.ptr.buffer.store.p1(ptr addrspace(1) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 728 ret void 729} 730 731define void @buffer_store_v2p1__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(1)> %data, i32 %voffset) #0 { 732; VERDE-LABEL: buffer_store_v2p1__voffset_add: 733; VERDE: ; %bb.0: 734; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 735; VERDE-NEXT: buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60 736; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 737; VERDE-NEXT: s_setpc_b64 s[30:31] 738; 739; CHECK-LABEL: buffer_store_v2p1__voffset_add: 740; CHECK: ; %bb.0: 741; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 742; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60 743; CHECK-NEXT: s_waitcnt vmcnt(0) 744; CHECK-NEXT: s_setpc_b64 s[30:31] 745 %voffset.add = add i32 %voffset, 60 746 call void @llvm.amdgcn.raw.ptr.buffer.store.v2p1(<2 x ptr addrspace(1)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 747 ret void 748} 749 750define void @buffer_store_p4__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(4) %data, i32 %voffset) #0 { 751; VERDE-LABEL: buffer_store_p4__voffset_add: 752; VERDE: ; %bb.0: 753; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 754; VERDE-NEXT: buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60 755; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 756; VERDE-NEXT: s_setpc_b64 s[30:31] 757; 758; CHECK-LABEL: buffer_store_p4__voffset_add: 759; CHECK: ; %bb.0: 760; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 761; CHECK-NEXT: buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60 762; CHECK-NEXT: s_waitcnt vmcnt(0) 763; CHECK-NEXT: s_setpc_b64 s[30:31] 764 %voffset.add = add i32 %voffset, 60 765 call void @llvm.amdgcn.raw.ptr.buffer.store.p4(ptr addrspace(4) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 766 ret void 767} 768 769define void @buffer_store_v2p4__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(4)> %data, i32 %voffset) #0 { 770; VERDE-LABEL: buffer_store_v2p4__voffset_add: 771; VERDE: ; %bb.0: 772; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 773; VERDE-NEXT: buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60 774; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 775; VERDE-NEXT: s_setpc_b64 s[30:31] 776; 777; CHECK-LABEL: buffer_store_v2p4__voffset_add: 778; CHECK: ; %bb.0: 779; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 780; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60 781; CHECK-NEXT: s_waitcnt vmcnt(0) 782; CHECK-NEXT: s_setpc_b64 s[30:31] 783 %voffset.add = add i32 %voffset, 60 784 call void @llvm.amdgcn.raw.ptr.buffer.store.v2p4(<2 x ptr addrspace(4)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 785 ret void 786} 787 788define void @buffer_store_p999__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(999) %data, i32 %voffset) #0 { 789; VERDE-LABEL: buffer_store_p999__voffset_add: 790; VERDE: ; %bb.0: 791; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 792; VERDE-NEXT: buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60 793; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 794; VERDE-NEXT: s_setpc_b64 s[30:31] 795; 796; CHECK-LABEL: buffer_store_p999__voffset_add: 797; CHECK: ; %bb.0: 798; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 799; CHECK-NEXT: buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60 800; CHECK-NEXT: s_waitcnt vmcnt(0) 801; CHECK-NEXT: s_setpc_b64 s[30:31] 802 %voffset.add = add i32 %voffset, 60 803 call void @llvm.amdgcn.raw.ptr.buffer.store.p999(ptr addrspace(999) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 804 ret void 805} 806 807define void @buffer_store_v2p999__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(999)> %data, i32 %voffset) #0 { 808; VERDE-LABEL: buffer_store_v2p999__voffset_add: 809; VERDE: ; %bb.0: 810; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 811; VERDE-NEXT: buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60 812; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 813; VERDE-NEXT: s_setpc_b64 s[30:31] 814; 815; CHECK-LABEL: buffer_store_v2p999__voffset_add: 816; CHECK: ; %bb.0: 817; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 818; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60 819; CHECK-NEXT: s_waitcnt vmcnt(0) 820; CHECK-NEXT: s_setpc_b64 s[30:31] 821 %voffset.add = add i32 %voffset, 60 822 call void @llvm.amdgcn.raw.ptr.buffer.store.v2p999(<2 x ptr addrspace(999)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 823 ret void 824} 825 826define void @buffer_store_p2__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(2) %data, i32 %voffset) #0 { 827; VERDE-LABEL: buffer_store_p2__voffset_add: 828; VERDE: ; %bb.0: 829; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 830; VERDE-NEXT: buffer_store_dword v0, v1, s[16:19], 0 offen offset:60 831; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 832; VERDE-NEXT: s_setpc_b64 s[30:31] 833; 834; CHECK-LABEL: buffer_store_p2__voffset_add: 835; CHECK: ; %bb.0: 836; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 837; CHECK-NEXT: buffer_store_dword v0, v1, s[16:19], 0 offen offset:60 838; CHECK-NEXT: s_waitcnt vmcnt(0) 839; CHECK-NEXT: s_setpc_b64 s[30:31] 840 %voffset.add = add i32 %voffset, 60 841 call void @llvm.amdgcn.raw.ptr.buffer.store.p2(ptr addrspace(2) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 842 ret void 843} 844 845define void @buffer_store_v2p2__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(2)> %data, i32 %voffset) #0 { 846; VERDE-LABEL: buffer_store_v2p2__voffset_add: 847; VERDE: ; %bb.0: 848; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 849; VERDE-NEXT: buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60 850; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 851; VERDE-NEXT: s_setpc_b64 s[30:31] 852; 853; CHECK-LABEL: buffer_store_v2p2__voffset_add: 854; CHECK: ; %bb.0: 855; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 856; CHECK-NEXT: buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60 857; CHECK-NEXT: s_waitcnt vmcnt(0) 858; CHECK-NEXT: s_setpc_b64 s[30:31] 859 %voffset.add = add i32 %voffset, 60 860 call void @llvm.amdgcn.raw.ptr.buffer.store.v2p2(<2 x ptr addrspace(2)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 861 ret void 862} 863 864define void @buffer_store_v3p2__voffset_add(ptr addrspace(8) inreg %rsrc, <3 x ptr addrspace(2)> %data, i32 %voffset) #0 { 865; VERDE-LABEL: buffer_store_v3p2__voffset_add: 866; VERDE: ; %bb.0: 867; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 868; VERDE-NEXT: buffer_store_dwordx3 v[0:2], v3, s[16:19], 0 offen offset:60 869; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 870; VERDE-NEXT: s_setpc_b64 s[30:31] 871; 872; CHECK-LABEL: buffer_store_v3p2__voffset_add: 873; CHECK: ; %bb.0: 874; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 875; CHECK-NEXT: buffer_store_dwordx3 v[0:2], v3, s[16:19], 0 offen offset:60 876; CHECK-NEXT: s_waitcnt vmcnt(0) 877; CHECK-NEXT: s_setpc_b64 s[30:31] 878 %voffset.add = add i32 %voffset, 60 879 call void @llvm.amdgcn.raw.ptr.buffer.store.v3p2(<3 x ptr addrspace(2)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 880 ret void 881} 882 883define void @buffer_store_v4p2__voffset_add(ptr addrspace(8) inreg %rsrc, <4 x ptr addrspace(2)> %data, i32 %voffset) #0 { 884; VERDE-LABEL: buffer_store_v4p2__voffset_add: 885; VERDE: ; %bb.0: 886; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 887; VERDE-NEXT: buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60 888; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 889; VERDE-NEXT: s_setpc_b64 s[30:31] 890; 891; CHECK-LABEL: buffer_store_v4p2__voffset_add: 892; CHECK: ; %bb.0: 893; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 894; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60 895; CHECK-NEXT: s_waitcnt vmcnt(0) 896; CHECK-NEXT: s_setpc_b64 s[30:31] 897 %voffset.add = add i32 %voffset, 60 898 call void @llvm.amdgcn.raw.ptr.buffer.store.v4p2(<4 x ptr addrspace(2)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 899 ret void 900} 901 902define void @buffer_store_p3__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %data, i32 %voffset) #0 { 903; VERDE-LABEL: buffer_store_p3__voffset_add: 904; VERDE: ; %bb.0: 905; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 906; VERDE-NEXT: buffer_store_dword v0, v1, s[16:19], 0 offen offset:60 907; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 908; VERDE-NEXT: s_setpc_b64 s[30:31] 909; 910; CHECK-LABEL: buffer_store_p3__voffset_add: 911; CHECK: ; %bb.0: 912; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 913; CHECK-NEXT: buffer_store_dword v0, v1, s[16:19], 0 offen offset:60 914; CHECK-NEXT: s_waitcnt vmcnt(0) 915; CHECK-NEXT: s_setpc_b64 s[30:31] 916 %voffset.add = add i32 %voffset, 60 917 call void @llvm.amdgcn.raw.ptr.buffer.store.p3(ptr addrspace(3) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 918 ret void 919} 920 921define void @buffer_store_v2p3__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(3)> %data, i32 %voffset) #0 { 922; VERDE-LABEL: buffer_store_v2p3__voffset_add: 923; VERDE: ; %bb.0: 924; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 925; VERDE-NEXT: buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60 926; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 927; VERDE-NEXT: s_setpc_b64 s[30:31] 928; 929; CHECK-LABEL: buffer_store_v2p3__voffset_add: 930; CHECK: ; %bb.0: 931; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 932; CHECK-NEXT: buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60 933; CHECK-NEXT: s_waitcnt vmcnt(0) 934; CHECK-NEXT: s_setpc_b64 s[30:31] 935 %voffset.add = add i32 %voffset, 60 936 call void @llvm.amdgcn.raw.ptr.buffer.store.v2p3(<2 x ptr addrspace(3)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 937 ret void 938} 939 940define void @buffer_store_v3p3__voffset_add(ptr addrspace(8) inreg %rsrc, <3 x ptr addrspace(3)> %data, i32 %voffset) #0 { 941; VERDE-LABEL: buffer_store_v3p3__voffset_add: 942; VERDE: ; %bb.0: 943; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 944; VERDE-NEXT: buffer_store_dwordx3 v[0:2], v3, s[16:19], 0 offen offset:60 945; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 946; VERDE-NEXT: s_setpc_b64 s[30:31] 947; 948; CHECK-LABEL: buffer_store_v3p3__voffset_add: 949; CHECK: ; %bb.0: 950; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 951; CHECK-NEXT: buffer_store_dwordx3 v[0:2], v3, s[16:19], 0 offen offset:60 952; CHECK-NEXT: s_waitcnt vmcnt(0) 953; CHECK-NEXT: s_setpc_b64 s[30:31] 954 %voffset.add = add i32 %voffset, 60 955 call void @llvm.amdgcn.raw.ptr.buffer.store.v3p3(<3 x ptr addrspace(3)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 956 ret void 957} 958 959define void @buffer_store_v4p3__voffset_add(ptr addrspace(8) inreg %rsrc, <4 x ptr addrspace(3)> %data, i32 %voffset) #0 { 960; VERDE-LABEL: buffer_store_v4p3__voffset_add: 961; VERDE: ; %bb.0: 962; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 963; VERDE-NEXT: buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60 964; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 965; VERDE-NEXT: s_setpc_b64 s[30:31] 966; 967; CHECK-LABEL: buffer_store_v4p3__voffset_add: 968; CHECK: ; %bb.0: 969; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 970; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60 971; CHECK-NEXT: s_waitcnt vmcnt(0) 972; CHECK-NEXT: s_setpc_b64 s[30:31] 973 %voffset.add = add i32 %voffset, 60 974 call void @llvm.amdgcn.raw.ptr.buffer.store.v4p3(<4 x ptr addrspace(3)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 975 ret void 976} 977 978define void @buffer_store_p5__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(5) %data, i32 %voffset) #0 { 979; VERDE-LABEL: buffer_store_p5__voffset_add: 980; VERDE: ; %bb.0: 981; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 982; VERDE-NEXT: buffer_store_dword v0, v1, s[16:19], 0 offen offset:60 983; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 984; VERDE-NEXT: s_setpc_b64 s[30:31] 985; 986; CHECK-LABEL: buffer_store_p5__voffset_add: 987; CHECK: ; %bb.0: 988; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 989; CHECK-NEXT: buffer_store_dword v0, v1, s[16:19], 0 offen offset:60 990; CHECK-NEXT: s_waitcnt vmcnt(0) 991; CHECK-NEXT: s_setpc_b64 s[30:31] 992 %voffset.add = add i32 %voffset, 60 993 call void @llvm.amdgcn.raw.ptr.buffer.store.p5(ptr addrspace(5) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 994 ret void 995} 996 997define void @buffer_store_v2p5__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(5)> %data, i32 %voffset) #0 { 998; VERDE-LABEL: buffer_store_v2p5__voffset_add: 999; VERDE: ; %bb.0: 1000; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1001; VERDE-NEXT: buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60 1002; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1003; VERDE-NEXT: s_setpc_b64 s[30:31] 1004; 1005; CHECK-LABEL: buffer_store_v2p5__voffset_add: 1006; CHECK: ; %bb.0: 1007; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1008; CHECK-NEXT: buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60 1009; CHECK-NEXT: s_waitcnt vmcnt(0) 1010; CHECK-NEXT: s_setpc_b64 s[30:31] 1011 %voffset.add = add i32 %voffset, 60 1012 call void @llvm.amdgcn.raw.ptr.buffer.store.v2p5(<2 x ptr addrspace(5)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1013 ret void 1014} 1015 1016define void @buffer_store_v3p5__voffset_add(ptr addrspace(8) inreg %rsrc, <3 x ptr addrspace(5)> %data, i32 %voffset) #0 { 1017; VERDE-LABEL: buffer_store_v3p5__voffset_add: 1018; VERDE: ; %bb.0: 1019; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1020; VERDE-NEXT: buffer_store_dwordx3 v[0:2], v3, s[16:19], 0 offen offset:60 1021; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1022; VERDE-NEXT: s_setpc_b64 s[30:31] 1023; 1024; CHECK-LABEL: buffer_store_v3p5__voffset_add: 1025; CHECK: ; %bb.0: 1026; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1027; CHECK-NEXT: buffer_store_dwordx3 v[0:2], v3, s[16:19], 0 offen offset:60 1028; CHECK-NEXT: s_waitcnt vmcnt(0) 1029; CHECK-NEXT: s_setpc_b64 s[30:31] 1030 %voffset.add = add i32 %voffset, 60 1031 call void @llvm.amdgcn.raw.ptr.buffer.store.v3p5(<3 x ptr addrspace(5)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1032 ret void 1033} 1034 1035define void @buffer_store_v4p5__voffset_add(ptr addrspace(8) inreg %rsrc, <4 x ptr addrspace(5)> %data, i32 %voffset) #0 { 1036; VERDE-LABEL: buffer_store_v4p5__voffset_add: 1037; VERDE: ; %bb.0: 1038; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1039; VERDE-NEXT: buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60 1040; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1041; VERDE-NEXT: s_setpc_b64 s[30:31] 1042; 1043; CHECK-LABEL: buffer_store_v4p5__voffset_add: 1044; CHECK: ; %bb.0: 1045; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1046; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60 1047; CHECK-NEXT: s_waitcnt vmcnt(0) 1048; CHECK-NEXT: s_setpc_b64 s[30:31] 1049 %voffset.add = add i32 %voffset, 60 1050 call void @llvm.amdgcn.raw.ptr.buffer.store.v4p5(<4 x ptr addrspace(5)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1051 ret void 1052} 1053 1054define void @buffer_store_p6__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(6) %data, i32 %voffset) #0 { 1055; VERDE-LABEL: buffer_store_p6__voffset_add: 1056; VERDE: ; %bb.0: 1057; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1058; VERDE-NEXT: buffer_store_dword v0, v1, s[16:19], 0 offen offset:60 1059; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1060; VERDE-NEXT: s_setpc_b64 s[30:31] 1061; 1062; CHECK-LABEL: buffer_store_p6__voffset_add: 1063; CHECK: ; %bb.0: 1064; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1065; CHECK-NEXT: buffer_store_dword v0, v1, s[16:19], 0 offen offset:60 1066; CHECK-NEXT: s_waitcnt vmcnt(0) 1067; CHECK-NEXT: s_setpc_b64 s[30:31] 1068 %voffset.add = add i32 %voffset, 60 1069 call void @llvm.amdgcn.raw.ptr.buffer.store.p6(ptr addrspace(6) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1070 ret void 1071} 1072 1073define void @buffer_store_v2p6__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(6)> %data, i32 %voffset) #0 { 1074; VERDE-LABEL: buffer_store_v2p6__voffset_add: 1075; VERDE: ; %bb.0: 1076; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1077; VERDE-NEXT: buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60 1078; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1079; VERDE-NEXT: s_setpc_b64 s[30:31] 1080; 1081; CHECK-LABEL: buffer_store_v2p6__voffset_add: 1082; CHECK: ; %bb.0: 1083; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1084; CHECK-NEXT: buffer_store_dwordx2 v[0:1], v2, s[16:19], 0 offen offset:60 1085; CHECK-NEXT: s_waitcnt vmcnt(0) 1086; CHECK-NEXT: s_setpc_b64 s[30:31] 1087 %voffset.add = add i32 %voffset, 60 1088 call void @llvm.amdgcn.raw.ptr.buffer.store.v2p6(<2 x ptr addrspace(6)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1089 ret void 1090} 1091 1092define void @buffer_store_v3p6__voffset_add(ptr addrspace(8) inreg %rsrc, <3 x ptr addrspace(6)> %data, i32 %voffset) #0 { 1093; VERDE-LABEL: buffer_store_v3p6__voffset_add: 1094; VERDE: ; %bb.0: 1095; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1096; VERDE-NEXT: buffer_store_dwordx3 v[0:2], v3, s[16:19], 0 offen offset:60 1097; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1098; VERDE-NEXT: s_setpc_b64 s[30:31] 1099; 1100; CHECK-LABEL: buffer_store_v3p6__voffset_add: 1101; CHECK: ; %bb.0: 1102; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1103; CHECK-NEXT: buffer_store_dwordx3 v[0:2], v3, s[16:19], 0 offen offset:60 1104; CHECK-NEXT: s_waitcnt vmcnt(0) 1105; CHECK-NEXT: s_setpc_b64 s[30:31] 1106 %voffset.add = add i32 %voffset, 60 1107 call void @llvm.amdgcn.raw.ptr.buffer.store.v3p6(<3 x ptr addrspace(6)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1108 ret void 1109} 1110 1111define void @buffer_store_v4p6__voffset_add(ptr addrspace(8) inreg %rsrc, <4 x ptr addrspace(6)> %data, i32 %voffset) #0 { 1112; VERDE-LABEL: buffer_store_v4p6__voffset_add: 1113; VERDE: ; %bb.0: 1114; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1115; VERDE-NEXT: buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60 1116; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1117; VERDE-NEXT: s_setpc_b64 s[30:31] 1118; 1119; CHECK-LABEL: buffer_store_v4p6__voffset_add: 1120; CHECK: ; %bb.0: 1121; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1122; CHECK-NEXT: buffer_store_dwordx4 v[0:3], v4, s[16:19], 0 offen offset:60 1123; CHECK-NEXT: s_waitcnt vmcnt(0) 1124; CHECK-NEXT: s_setpc_b64 s[30:31] 1125 %voffset.add = add i32 %voffset, 60 1126 call void @llvm.amdgcn.raw.ptr.buffer.store.v4p6(<4 x ptr addrspace(6)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0) 1127 ret void 1128} 1129 1130declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32) #0 1131declare void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float>, ptr addrspace(8), i32, i32, i32) #0 1132declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32) #0 1133declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8), i32, i32, i32) #0 1134declare void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32>, ptr addrspace(8), i32, i32, i32) #0 1135declare void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32>, ptr addrspace(8), i32, i32, i32) #0 1136declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32) #1 1137declare void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8, ptr addrspace(8), i32, i32, i32) #0 1138declare void @llvm.amdgcn.raw.ptr.buffer.store.f16(half, ptr addrspace(8), i32, i32, i32) #0 1139declare void @llvm.amdgcn.raw.ptr.buffer.store.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32) #0 1140declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f16(<4 x half>, ptr addrspace(8), i32, i32, i32) #0 1141declare void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16, ptr addrspace(8), i32, i32, i32) #0 1142declare void @llvm.amdgcn.raw.ptr.buffer.store.v2i16(<2 x i16>, ptr addrspace(8), i32, i32, i32) #0 1143declare void @llvm.amdgcn.raw.ptr.buffer.store.v4i16(<4 x i16>, ptr addrspace(8), i32, i32, i32) #0 1144 1145attributes #0 = { nounwind } 1146attributes #1 = { nounwind readonly } 1147