148968c47SPetar Avramovic; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 29e9907f1SFangrui Song; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908 %s 39e9907f1SFangrui Song; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s 49e9907f1SFangrui Song; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s 548968c47SPetar Avramovic 648968c47SPetar Avramovicdefine amdgpu_ps void @buffer_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) { 748968c47SPetar Avramovic ; GFX908-LABEL: name: buffer_atomic_fadd_v2f16_offset_no_rtn 848968c47SPetar Avramovic ; GFX908: bb.1 (%ir-block.0): 948968c47SPetar Avramovic ; GFX908-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 1048968c47SPetar Avramovic ; GFX908-NEXT: {{ $}} 1148968c47SPetar Avramovic ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1248968c47SPetar Avramovic ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 1348968c47SPetar Avramovic ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 1448968c47SPetar Avramovic ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 1548968c47SPetar Avramovic ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 1648968c47SPetar Avramovic ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 1748968c47SPetar Avramovic ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 18f0415f2aSKrzysztof Drewniak ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) 1948968c47SPetar Avramovic ; GFX908-NEXT: S_ENDPGM 0 20*e7900e69SMatt Arsenault ; 2148968c47SPetar Avramovic ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offset_no_rtn 2248968c47SPetar Avramovic ; GFX90A_GFX940: bb.1 (%ir-block.0): 2348968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 2448968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: {{ $}} 2548968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 2648968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 2748968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 2848968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 2948968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 3048968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 3148968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 32f0415f2aSKrzysztof Drewniak ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) 3348968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: S_ENDPGM 0 3448968c47SPetar Avramovic %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0) 3548968c47SPetar Avramovic ret void 3648968c47SPetar Avramovic} 3748968c47SPetar Avramovic 3848968c47SPetar Avramovicdefine amdgpu_ps void @buffer_atomic_fadd_v2f16_offen_no_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 3948968c47SPetar Avramovic ; GFX908-LABEL: name: buffer_atomic_fadd_v2f16_offen_no_rtn 4048968c47SPetar Avramovic ; GFX908: bb.1 (%ir-block.0): 4148968c47SPetar Avramovic ; GFX908-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 4248968c47SPetar Avramovic ; GFX908-NEXT: {{ $}} 4348968c47SPetar Avramovic ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 4448968c47SPetar Avramovic ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 4548968c47SPetar Avramovic ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 4648968c47SPetar Avramovic ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 4748968c47SPetar Avramovic ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 4848968c47SPetar Avramovic ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 4948968c47SPetar Avramovic ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 5048968c47SPetar Avramovic ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 51f0415f2aSKrzysztof Drewniak ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) 5248968c47SPetar Avramovic ; GFX908-NEXT: S_ENDPGM 0 53*e7900e69SMatt Arsenault ; 5448968c47SPetar Avramovic ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offen_no_rtn 5548968c47SPetar Avramovic ; GFX90A_GFX940: bb.1 (%ir-block.0): 5648968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 5748968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: {{ $}} 5848968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 5948968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 6048968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 6148968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 6248968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 6348968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 6448968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 6548968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 66f0415f2aSKrzysztof Drewniak ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) 6748968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: S_ENDPGM 0 6848968c47SPetar Avramovic %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 6948968c47SPetar Avramovic ret void 7048968c47SPetar Avramovic} 7148968c47SPetar Avramovic 7248968c47SPetar Avramovicdefine amdgpu_ps void @buffer_atomic_fadd_v2f16_idxen_no_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 inreg %soffset) { 7348968c47SPetar Avramovic ; GFX908-LABEL: name: buffer_atomic_fadd_v2f16_idxen_no_rtn 7448968c47SPetar Avramovic ; GFX908: bb.1 (%ir-block.0): 7548968c47SPetar Avramovic ; GFX908-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 7648968c47SPetar Avramovic ; GFX908-NEXT: {{ $}} 7748968c47SPetar Avramovic ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 7848968c47SPetar Avramovic ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 7948968c47SPetar Avramovic ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 8048968c47SPetar Avramovic ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 8148968c47SPetar Avramovic ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 8248968c47SPetar Avramovic ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 8348968c47SPetar Avramovic ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 8448968c47SPetar Avramovic ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 85f0415f2aSKrzysztof Drewniak ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) 8648968c47SPetar Avramovic ; GFX908-NEXT: S_ENDPGM 0 87*e7900e69SMatt Arsenault ; 8848968c47SPetar Avramovic ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_idxen_no_rtn 8948968c47SPetar Avramovic ; GFX90A_GFX940: bb.1 (%ir-block.0): 9048968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 9148968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: {{ $}} 9248968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 9348968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 9448968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 9548968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 9648968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 9748968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 9848968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 9948968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 100f0415f2aSKrzysztof Drewniak ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) 10148968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: S_ENDPGM 0 10248968c47SPetar Avramovic %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) 10348968c47SPetar Avramovic ret void 10448968c47SPetar Avramovic} 10548968c47SPetar Avramovic 10648968c47SPetar Avramovicdefine amdgpu_ps void @buffer_atomic_fadd_v2f16_bothen_no_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 10748968c47SPetar Avramovic ; GFX908-LABEL: name: buffer_atomic_fadd_v2f16_bothen_no_rtn 10848968c47SPetar Avramovic ; GFX908: bb.1 (%ir-block.0): 10948968c47SPetar Avramovic ; GFX908-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 11048968c47SPetar Avramovic ; GFX908-NEXT: {{ $}} 11148968c47SPetar Avramovic ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 11248968c47SPetar Avramovic ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 11348968c47SPetar Avramovic ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 11448968c47SPetar Avramovic ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 11548968c47SPetar Avramovic ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 11648968c47SPetar Avramovic ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 11748968c47SPetar Avramovic ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 11848968c47SPetar Avramovic ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 11948968c47SPetar Avramovic ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 12048968c47SPetar Avramovic ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 121f0415f2aSKrzysztof Drewniak ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) 12248968c47SPetar Avramovic ; GFX908-NEXT: S_ENDPGM 0 123*e7900e69SMatt Arsenault ; 12448968c47SPetar Avramovic ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_bothen_no_rtn 12548968c47SPetar Avramovic ; GFX90A_GFX940: bb.1 (%ir-block.0): 12648968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 12748968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: {{ $}} 12848968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 12948968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 13048968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 13148968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 13248968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 13348968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 13448968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 13548968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 13648968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 13748968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 138f0415f2aSKrzysztof Drewniak ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8) 13948968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: S_ENDPGM 0 14048968c47SPetar Avramovic %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) 14148968c47SPetar Avramovic ret void 14248968c47SPetar Avramovic} 14348968c47SPetar Avramovic 144faa2c678SKrzysztof Drewniakdefine amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) { 145faa2c678SKrzysztof Drewniak ; GFX908-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_no_rtn 146faa2c678SKrzysztof Drewniak ; GFX908: bb.1 (%ir-block.0): 147faa2c678SKrzysztof Drewniak ; GFX908-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 148faa2c678SKrzysztof Drewniak ; GFX908-NEXT: {{ $}} 149faa2c678SKrzysztof Drewniak ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 150faa2c678SKrzysztof Drewniak ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 151faa2c678SKrzysztof Drewniak ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 152faa2c678SKrzysztof Drewniak ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 153faa2c678SKrzysztof Drewniak ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 154faa2c678SKrzysztof Drewniak ; GFX908-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 155faa2c678SKrzysztof Drewniak ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 156ab379378SKrzysztof Drewniak ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) 157faa2c678SKrzysztof Drewniak ; GFX908-NEXT: S_ENDPGM 0 158*e7900e69SMatt Arsenault ; 159faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_no_rtn 160faa2c678SKrzysztof Drewniak ; GFX90A_GFX940: bb.1 (%ir-block.0): 161faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 162faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: {{ $}} 163faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 164faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 165faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 166faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 167faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 168faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 169faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 170ab379378SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) 171faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: S_ENDPGM 0 172faa2c678SKrzysztof Drewniak %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 4095, i32 %soffset, i32 0) 173faa2c678SKrzysztof Drewniak ret void 174faa2c678SKrzysztof Drewniak} 175faa2c678SKrzysztof Drewniak 176faa2c678SKrzysztof Drewniakdefine amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_offen_no_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 177faa2c678SKrzysztof Drewniak ; GFX908-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_no_rtn 178faa2c678SKrzysztof Drewniak ; GFX908: bb.1 (%ir-block.0): 179faa2c678SKrzysztof Drewniak ; GFX908-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 180faa2c678SKrzysztof Drewniak ; GFX908-NEXT: {{ $}} 181faa2c678SKrzysztof Drewniak ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 182faa2c678SKrzysztof Drewniak ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 183faa2c678SKrzysztof Drewniak ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 184faa2c678SKrzysztof Drewniak ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 185faa2c678SKrzysztof Drewniak ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 186faa2c678SKrzysztof Drewniak ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 187faa2c678SKrzysztof Drewniak ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 188faa2c678SKrzysztof Drewniak ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 189ab379378SKrzysztof Drewniak ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) 190faa2c678SKrzysztof Drewniak ; GFX908-NEXT: S_ENDPGM 0 191*e7900e69SMatt Arsenault ; 192faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_no_rtn 193faa2c678SKrzysztof Drewniak ; GFX90A_GFX940: bb.1 (%ir-block.0): 194faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 195faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: {{ $}} 196faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 197faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 198faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 199faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 200faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 201faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 202faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 203faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 204ab379378SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) 205faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: S_ENDPGM 0 206faa2c678SKrzysztof Drewniak %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 207faa2c678SKrzysztof Drewniak ret void 208faa2c678SKrzysztof Drewniak} 209faa2c678SKrzysztof Drewniak 210faa2c678SKrzysztof Drewniakdefine amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_idxen_no_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) { 211faa2c678SKrzysztof Drewniak ; GFX908-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_no_rtn 212faa2c678SKrzysztof Drewniak ; GFX908: bb.1 (%ir-block.0): 213faa2c678SKrzysztof Drewniak ; GFX908-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 214faa2c678SKrzysztof Drewniak ; GFX908-NEXT: {{ $}} 215faa2c678SKrzysztof Drewniak ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 216faa2c678SKrzysztof Drewniak ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 217faa2c678SKrzysztof Drewniak ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 218faa2c678SKrzysztof Drewniak ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 219faa2c678SKrzysztof Drewniak ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 220faa2c678SKrzysztof Drewniak ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 221faa2c678SKrzysztof Drewniak ; GFX908-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 222faa2c678SKrzysztof Drewniak ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 223ab379378SKrzysztof Drewniak ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) 224faa2c678SKrzysztof Drewniak ; GFX908-NEXT: S_ENDPGM 0 225*e7900e69SMatt Arsenault ; 226faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_no_rtn 227faa2c678SKrzysztof Drewniak ; GFX90A_GFX940: bb.1 (%ir-block.0): 228faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 229faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: {{ $}} 230faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 231faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 232faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 233faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 234faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 235faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 236faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 237faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 238ab379378SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) 239faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: S_ENDPGM 0 240faa2c678SKrzysztof Drewniak %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) 241faa2c678SKrzysztof Drewniak ret void 242faa2c678SKrzysztof Drewniak} 243faa2c678SKrzysztof Drewniak 244faa2c678SKrzysztof Drewniakdefine amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 245faa2c678SKrzysztof Drewniak ; GFX908-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn 246faa2c678SKrzysztof Drewniak ; GFX908: bb.1 (%ir-block.0): 247faa2c678SKrzysztof Drewniak ; GFX908-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 248faa2c678SKrzysztof Drewniak ; GFX908-NEXT: {{ $}} 249faa2c678SKrzysztof Drewniak ; GFX908-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 250faa2c678SKrzysztof Drewniak ; GFX908-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 251faa2c678SKrzysztof Drewniak ; GFX908-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 252faa2c678SKrzysztof Drewniak ; GFX908-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 253faa2c678SKrzysztof Drewniak ; GFX908-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 254faa2c678SKrzysztof Drewniak ; GFX908-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 255faa2c678SKrzysztof Drewniak ; GFX908-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 256faa2c678SKrzysztof Drewniak ; GFX908-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 257faa2c678SKrzysztof Drewniak ; GFX908-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 258faa2c678SKrzysztof Drewniak ; GFX908-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 259ab379378SKrzysztof Drewniak ; GFX908-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) 260faa2c678SKrzysztof Drewniak ; GFX908-NEXT: S_ENDPGM 0 261*e7900e69SMatt Arsenault ; 262faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn 263faa2c678SKrzysztof Drewniak ; GFX90A_GFX940: bb.1 (%ir-block.0): 264faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 265faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: {{ $}} 266faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 267faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 268faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 269faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 270faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 271faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 272faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 273faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 274faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 275faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 276ab379378SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8) 277faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: S_ENDPGM 0 278faa2c678SKrzysztof Drewniak %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) 279faa2c678SKrzysztof Drewniak ret void 280faa2c678SKrzysztof Drewniak} 281faa2c678SKrzysztof Drewniak 28248968c47SPetar Avramovicdeclare <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg) 28348968c47SPetar Avramovicdeclare <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32 immarg) 284faa2c678SKrzysztof Drewniak 285faa2c678SKrzysztof Drewniakdeclare <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32 immarg) 286faa2c678SKrzysztof Drewniakdeclare <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32, i32 immarg) 287