1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx908 < %s | FileCheck -check-prefix=GFX908 %s 3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s 4; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s 5; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s 6 7define void @raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) #0 { 8; GFX908-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: 9; GFX908: ; %bb.0: 10; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GFX908-NEXT: buffer_atomic_add_f32 v0, v1, s[16:19], s20 offen 12; GFX908-NEXT: s_waitcnt vmcnt(0) 13; GFX908-NEXT: s_setpc_b64 s[30:31] 14; 15; GFX90A-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: 16; GFX90A: ; %bb.0: 17; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18; GFX90A-NEXT: buffer_atomic_add_f32 v0, v1, s[16:19], s20 offen scc 19; GFX90A-NEXT: s_waitcnt vmcnt(0) 20; GFX90A-NEXT: s_setpc_b64 s[30:31] 21; 22; GFX940-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: 23; GFX940: ; %bb.0: 24; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25; GFX940-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s16 offen sc1 26; GFX940-NEXT: s_waitcnt vmcnt(0) 27; GFX940-NEXT: s_setpc_b64 s[30:31] 28; 29; GFX12-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: 30; GFX12: ; %bb.0: 31; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 32; GFX12-NEXT: s_wait_expcnt 0x0 33; GFX12-NEXT: s_wait_samplecnt 0x0 34; GFX12-NEXT: s_wait_bvhcnt 0x0 35; GFX12-NEXT: s_wait_kmcnt 0x0 36; GFX12-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s16 offen scope:SCOPE_SYS 37; GFX12-NEXT: s_setpc_b64 s[30:31] 38 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 24) 39 ret void 40} 41 42define void @raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) #0 { 43; GFX908-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: 44; GFX908: ; %bb.0: 45; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 46; GFX908-NEXT: buffer_atomic_add_f32 v0, off, s[16:19], s20 47; GFX908-NEXT: s_waitcnt vmcnt(0) 48; GFX908-NEXT: s_setpc_b64 s[30:31] 49; 50; GFX90A-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: 51; GFX90A: ; %bb.0: 52; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 53; GFX90A-NEXT: buffer_atomic_add_f32 v0, off, s[16:19], s20 54; GFX90A-NEXT: s_waitcnt vmcnt(0) 55; GFX90A-NEXT: s_setpc_b64 s[30:31] 56; 57; GFX940-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: 58; GFX940: ; %bb.0: 59; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 60; GFX940-NEXT: buffer_atomic_add_f32 v0, off, s[0:3], s16 61; GFX940-NEXT: s_waitcnt vmcnt(0) 62; GFX940-NEXT: s_setpc_b64 s[30:31] 63; 64; GFX12-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: 65; GFX12: ; %bb.0: 66; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 67; GFX12-NEXT: s_wait_expcnt 0x0 68; GFX12-NEXT: s_wait_samplecnt 0x0 69; GFX12-NEXT: s_wait_bvhcnt 0x0 70; GFX12-NEXT: s_wait_kmcnt 0x0 71; GFX12-NEXT: buffer_atomic_add_f32 v0, off, s[0:3], s16 72; GFX12-NEXT: s_setpc_b64 s[30:31] 73 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0) 74 ret void 75} 76 77define void @raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) #0 { 78; GFX908-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: 79; GFX908: ; %bb.0: 80; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 81; GFX908-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[16:19], s20 offen 82; GFX908-NEXT: s_waitcnt vmcnt(0) 83; GFX908-NEXT: s_setpc_b64 s[30:31] 84; 85; GFX90A-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: 86; GFX90A: ; %bb.0: 87; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 88; GFX90A-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[16:19], s20 offen 89; GFX90A-NEXT: s_waitcnt vmcnt(0) 90; GFX90A-NEXT: s_setpc_b64 s[30:31] 91; 92; GFX940-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: 93; GFX940: ; %bb.0: 94; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 95; GFX940-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], s16 offen 96; GFX940-NEXT: s_waitcnt vmcnt(0) 97; GFX940-NEXT: s_setpc_b64 s[30:31] 98; 99; GFX12-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: 100; GFX12: ; %bb.0: 101; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 102; GFX12-NEXT: s_wait_expcnt 0x0 103; GFX12-NEXT: s_wait_samplecnt 0x0 104; GFX12-NEXT: s_wait_bvhcnt 0x0 105; GFX12-NEXT: s_wait_kmcnt 0x0 106; GFX12-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], s16 offen 107; GFX12-NEXT: s_setpc_b64 s[30:31] 108 %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 109 ret void 110} 111 112define void @raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) #0 { 113; GFX908-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: 114; GFX908: ; %bb.0: 115; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 116; GFX908-NEXT: buffer_atomic_pk_add_f16 v0, off, s[16:19], s20 offset:92 117; GFX908-NEXT: s_waitcnt vmcnt(0) 118; GFX908-NEXT: s_setpc_b64 s[30:31] 119; 120; GFX90A-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: 121; GFX90A: ; %bb.0: 122; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 123; GFX90A-NEXT: buffer_atomic_pk_add_f16 v0, off, s[16:19], s20 offset:92 124; GFX90A-NEXT: s_waitcnt vmcnt(0) 125; GFX90A-NEXT: s_setpc_b64 s[30:31] 126; 127; GFX940-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: 128; GFX940: ; %bb.0: 129; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 130; GFX940-NEXT: buffer_atomic_pk_add_f16 v0, off, s[0:3], s16 offset:92 131; GFX940-NEXT: s_waitcnt vmcnt(0) 132; GFX940-NEXT: s_setpc_b64 s[30:31] 133; 134; GFX12-LABEL: raw_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: 135; GFX12: ; %bb.0: 136; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 137; GFX12-NEXT: s_wait_expcnt 0x0 138; GFX12-NEXT: s_wait_samplecnt 0x0 139; GFX12-NEXT: s_wait_bvhcnt 0x0 140; GFX12-NEXT: s_wait_kmcnt 0x0 141; GFX12-NEXT: buffer_atomic_pk_add_f16 v0, off, s[0:3], s16 offset:92 142; GFX12-NEXT: s_setpc_b64 s[30:31] 143 %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 92, i32 %soffset, i32 0) 144 ret void 145} 146 147define void @raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc(float %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) #0 { 148; GFX908-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc: 149; GFX908: ; %bb.0: 150; GFX908-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 151; GFX908-NEXT: buffer_atomic_add_f32 v0, v1, s[16:19], s20 offen slc 152; GFX908-NEXT: s_waitcnt vmcnt(0) 153; GFX908-NEXT: s_setpc_b64 s[30:31] 154; 155; GFX90A-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc: 156; GFX90A: ; %bb.0: 157; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 158; GFX90A-NEXT: buffer_atomic_add_f32 v0, v1, s[16:19], s20 offen slc 159; GFX90A-NEXT: s_waitcnt vmcnt(0) 160; GFX90A-NEXT: s_setpc_b64 s[30:31] 161; 162; GFX940-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc: 163; GFX940: ; %bb.0: 164; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 165; GFX940-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s16 offen nt 166; GFX940-NEXT: s_waitcnt vmcnt(0) 167; GFX940-NEXT: s_setpc_b64 s[30:31] 168; 169; GFX12-LABEL: raw_ptr_buffer_atomic_add_f32_noret__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc: 170; GFX12: ; %bb.0: 171; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 172; GFX12-NEXT: s_wait_expcnt 0x0 173; GFX12-NEXT: s_wait_samplecnt 0x0 174; GFX12-NEXT: s_wait_bvhcnt 0x0 175; GFX12-NEXT: s_wait_kmcnt 0x0 176; GFX12-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s16 offen th:TH_ATOMIC_NT 177; GFX12-NEXT: s_setpc_b64 s[30:31] 178 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 2) 179 ret void 180} 181 182declare float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32 immarg) 183declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32 immarg) 184 185attributes #0 = { nounwind } 186