1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s 3; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx940 < %s | FileCheck -check-prefix=GFX940 %s 4; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s 5 6define float @raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) #0 { 7; GFX90A-LABEL: raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: 8; GFX90A: ; %bb.0: 9; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GFX90A-NEXT: buffer_atomic_add_f32 v0, v1, s[16:19], s20 offen glc scc 11; GFX90A-NEXT: s_waitcnt vmcnt(0) 12; GFX90A-NEXT: s_setpc_b64 s[30:31] 13; 14; GFX940-LABEL: raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: 15; GFX940: ; %bb.0: 16; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17; GFX940-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s16 offen sc0 sc1 18; GFX940-NEXT: s_waitcnt vmcnt(0) 19; GFX940-NEXT: s_setpc_b64 s[30:31] 20; 21; GFX12-LABEL: raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: 22; GFX12: ; %bb.0: 23; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 24; GFX12-NEXT: s_wait_expcnt 0x0 25; GFX12-NEXT: s_wait_samplecnt 0x0 26; GFX12-NEXT: s_wait_bvhcnt 0x0 27; GFX12-NEXT: s_wait_kmcnt 0x0 28; GFX12-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s16 offen th:TH_ATOMIC_RETURN scope:SCOPE_SYS 29; GFX12-NEXT: s_wait_loadcnt 0x0 30; GFX12-NEXT: s_setpc_b64 s[30:31] 31 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 24) 32 ret float %ret 33} 34 35define float @raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(float %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) #0 { 36; GFX90A-LABEL: raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: 37; GFX90A: ; %bb.0: 38; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 39; GFX90A-NEXT: buffer_atomic_add_f32 v0, off, s[16:19], s20 glc 40; GFX90A-NEXT: s_waitcnt vmcnt(0) 41; GFX90A-NEXT: s_setpc_b64 s[30:31] 42; 43; GFX940-LABEL: raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: 44; GFX940: ; %bb.0: 45; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 46; GFX940-NEXT: buffer_atomic_add_f32 v0, off, s[0:3], s16 sc0 47; GFX940-NEXT: s_waitcnt vmcnt(0) 48; GFX940-NEXT: s_setpc_b64 s[30:31] 49; 50; GFX12-LABEL: raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: 51; GFX12: ; %bb.0: 52; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 53; GFX12-NEXT: s_wait_expcnt 0x0 54; GFX12-NEXT: s_wait_samplecnt 0x0 55; GFX12-NEXT: s_wait_bvhcnt 0x0 56; GFX12-NEXT: s_wait_kmcnt 0x0 57; GFX12-NEXT: buffer_atomic_add_f32 v0, off, s[0:3], s16 th:TH_ATOMIC_RETURN 58; GFX12-NEXT: s_wait_loadcnt 0x0 59; GFX12-NEXT: s_setpc_b64 s[30:31] 60 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0) 61 ret float %ret 62} 63 64define <2 x half> @raw_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) #0 { 65; GFX90A-LABEL: raw_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: 66; GFX90A: ; %bb.0: 67; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 68; GFX90A-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[16:19], s20 offen glc 69; GFX90A-NEXT: s_waitcnt vmcnt(0) 70; GFX90A-NEXT: s_setpc_b64 s[30:31] 71; 72; GFX940-LABEL: raw_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: 73; GFX940: ; %bb.0: 74; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 75; GFX940-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], s16 offen sc0 76; GFX940-NEXT: s_waitcnt vmcnt(0) 77; GFX940-NEXT: s_setpc_b64 s[30:31] 78; 79; GFX12-LABEL: raw_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset: 80; GFX12: ; %bb.0: 81; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 82; GFX12-NEXT: s_wait_expcnt 0x0 83; GFX12-NEXT: s_wait_samplecnt 0x0 84; GFX12-NEXT: s_wait_bvhcnt 0x0 85; GFX12-NEXT: s_wait_kmcnt 0x0 86; GFX12-NEXT: buffer_atomic_pk_add_f16 v0, v1, s[0:3], s16 offen th:TH_ATOMIC_RETURN 87; GFX12-NEXT: s_wait_loadcnt 0x0 88; GFX12-NEXT: s_setpc_b64 s[30:31] 89 %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 90 ret <2 x half> %ret 91} 92 93define <2 x half> @raw_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) #0 { 94; GFX90A-LABEL: raw_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: 95; GFX90A: ; %bb.0: 96; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 97; GFX90A-NEXT: buffer_atomic_pk_add_f16 v0, off, s[16:19], s20 offset:92 glc 98; GFX90A-NEXT: s_waitcnt vmcnt(0) 99; GFX90A-NEXT: s_setpc_b64 s[30:31] 100; 101; GFX940-LABEL: raw_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: 102; GFX940: ; %bb.0: 103; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 104; GFX940-NEXT: buffer_atomic_pk_add_f16 v0, off, s[0:3], s16 offset:92 sc0 105; GFX940-NEXT: s_waitcnt vmcnt(0) 106; GFX940-NEXT: s_setpc_b64 s[30:31] 107; 108; GFX12-LABEL: raw_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__sgpr_rsrc__0_voffset__sgpr_soffset: 109; GFX12: ; %bb.0: 110; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 111; GFX12-NEXT: s_wait_expcnt 0x0 112; GFX12-NEXT: s_wait_samplecnt 0x0 113; GFX12-NEXT: s_wait_bvhcnt 0x0 114; GFX12-NEXT: s_wait_kmcnt 0x0 115; GFX12-NEXT: buffer_atomic_pk_add_f16 v0, off, s[0:3], s16 offset:92 th:TH_ATOMIC_RETURN 116; GFX12-NEXT: s_wait_loadcnt 0x0 117; GFX12-NEXT: s_setpc_b64 s[30:31] 118 %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 92, i32 %soffset, i32 0) 119 ret <2 x half> %ret 120} 121 122define float @raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc(float %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) #0 { 123; GFX90A-LABEL: raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc: 124; GFX90A: ; %bb.0: 125; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 126; GFX90A-NEXT: buffer_atomic_add_f32 v0, v1, s[16:19], s20 offen glc slc 127; GFX90A-NEXT: s_waitcnt vmcnt(0) 128; GFX90A-NEXT: s_setpc_b64 s[30:31] 129; 130; GFX940-LABEL: raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc: 131; GFX940: ; %bb.0: 132; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 133; GFX940-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s16 offen sc0 nt 134; GFX940-NEXT: s_waitcnt vmcnt(0) 135; GFX940-NEXT: s_setpc_b64 s[30:31] 136; 137; GFX12-LABEL: raw_ptr_buffer_atomic_add_f32_rtn__vgpr_val__sgpr_rsrc__vgpr_voffset__sgpr_soffset_slc: 138; GFX12: ; %bb.0: 139; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 140; GFX12-NEXT: s_wait_expcnt 0x0 141; GFX12-NEXT: s_wait_samplecnt 0x0 142; GFX12-NEXT: s_wait_bvhcnt 0x0 143; GFX12-NEXT: s_wait_kmcnt 0x0 144; GFX12-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s16 offen th:TH_ATOMIC_NT_RETURN 145; GFX12-NEXT: s_wait_loadcnt 0x0 146; GFX12-NEXT: s_setpc_b64 s[30:31] 147 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 2) 148 ret float %ret 149} 150 151declare float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32 immarg) 152declare <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32 immarg) 153 154attributes #0 = { nounwind } 155