xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/buffer-atomic-fadd.v2f16-no-rtn.ll (revision e7900e695e7dfb36be8651d914a31f42a5d6c634)
148968c47SPetar Avramovic; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
29e9907f1SFangrui Song; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908 %s
39e9907f1SFangrui Song; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
49e9907f1SFangrui Song; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s
548968c47SPetar Avramovic
648968c47SPetar Avramovicdefine amdgpu_ps void @buffer_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) {
748968c47SPetar Avramovic  ; GFX908-LABEL: name: buffer_atomic_fadd_v2f16_offset_no_rtn
848968c47SPetar Avramovic  ; GFX908: bb.1 (%ir-block.0):
948968c47SPetar Avramovic  ; GFX908-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0
1048968c47SPetar Avramovic  ; GFX908-NEXT: {{  $}}
1148968c47SPetar Avramovic  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
1248968c47SPetar Avramovic  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
1348968c47SPetar Avramovic  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
1448968c47SPetar Avramovic  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
1548968c47SPetar Avramovic  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
1648968c47SPetar Avramovic  ; GFX908-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
1748968c47SPetar Avramovic  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
18f0415f2aSKrzysztof Drewniak  ; GFX908-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
1948968c47SPetar Avramovic  ; GFX908-NEXT:   S_ENDPGM 0
20*e7900e69SMatt Arsenault  ;
2148968c47SPetar Avramovic  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offset_no_rtn
2248968c47SPetar Avramovic  ; GFX90A_GFX940: bb.1 (%ir-block.0):
2348968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0
2448968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT: {{  $}}
2548968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
2648968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
2748968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
2848968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
2948968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
3048968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
3148968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
32f0415f2aSKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
3348968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
3448968c47SPetar Avramovic  %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
3548968c47SPetar Avramovic  ret void
3648968c47SPetar Avramovic}
3748968c47SPetar Avramovic
3848968c47SPetar Avramovicdefine amdgpu_ps void @buffer_atomic_fadd_v2f16_offen_no_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
3948968c47SPetar Avramovic  ; GFX908-LABEL: name: buffer_atomic_fadd_v2f16_offen_no_rtn
4048968c47SPetar Avramovic  ; GFX908: bb.1 (%ir-block.0):
4148968c47SPetar Avramovic  ; GFX908-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
4248968c47SPetar Avramovic  ; GFX908-NEXT: {{  $}}
4348968c47SPetar Avramovic  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
4448968c47SPetar Avramovic  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
4548968c47SPetar Avramovic  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
4648968c47SPetar Avramovic  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
4748968c47SPetar Avramovic  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
4848968c47SPetar Avramovic  ; GFX908-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
4948968c47SPetar Avramovic  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
5048968c47SPetar Avramovic  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
51f0415f2aSKrzysztof Drewniak  ; GFX908-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
5248968c47SPetar Avramovic  ; GFX908-NEXT:   S_ENDPGM 0
53*e7900e69SMatt Arsenault  ;
5448968c47SPetar Avramovic  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_offen_no_rtn
5548968c47SPetar Avramovic  ; GFX90A_GFX940: bb.1 (%ir-block.0):
5648968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
5748968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT: {{  $}}
5848968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
5948968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
6048968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
6148968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
6248968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
6348968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
6448968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
6548968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
66f0415f2aSKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
6748968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
6848968c47SPetar Avramovic  %ret = call <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
6948968c47SPetar Avramovic  ret void
7048968c47SPetar Avramovic}
7148968c47SPetar Avramovic
7248968c47SPetar Avramovicdefine amdgpu_ps void @buffer_atomic_fadd_v2f16_idxen_no_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
7348968c47SPetar Avramovic  ; GFX908-LABEL: name: buffer_atomic_fadd_v2f16_idxen_no_rtn
7448968c47SPetar Avramovic  ; GFX908: bb.1 (%ir-block.0):
7548968c47SPetar Avramovic  ; GFX908-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
7648968c47SPetar Avramovic  ; GFX908-NEXT: {{  $}}
7748968c47SPetar Avramovic  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
7848968c47SPetar Avramovic  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
7948968c47SPetar Avramovic  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
8048968c47SPetar Avramovic  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
8148968c47SPetar Avramovic  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
8248968c47SPetar Avramovic  ; GFX908-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
8348968c47SPetar Avramovic  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
8448968c47SPetar Avramovic  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
85f0415f2aSKrzysztof Drewniak  ; GFX908-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
8648968c47SPetar Avramovic  ; GFX908-NEXT:   S_ENDPGM 0
87*e7900e69SMatt Arsenault  ;
8848968c47SPetar Avramovic  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_idxen_no_rtn
8948968c47SPetar Avramovic  ; GFX90A_GFX940: bb.1 (%ir-block.0):
9048968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
9148968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT: {{  $}}
9248968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
9348968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
9448968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
9548968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
9648968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
9748968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
9848968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
9948968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
100f0415f2aSKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
10148968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
10248968c47SPetar Avramovic  %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
10348968c47SPetar Avramovic  ret void
10448968c47SPetar Avramovic}
10548968c47SPetar Avramovic
10648968c47SPetar Avramovicdefine amdgpu_ps void @buffer_atomic_fadd_v2f16_bothen_no_rtn(<2 x half> %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
10748968c47SPetar Avramovic  ; GFX908-LABEL: name: buffer_atomic_fadd_v2f16_bothen_no_rtn
10848968c47SPetar Avramovic  ; GFX908: bb.1 (%ir-block.0):
10948968c47SPetar Avramovic  ; GFX908-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
11048968c47SPetar Avramovic  ; GFX908-NEXT: {{  $}}
11148968c47SPetar Avramovic  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
11248968c47SPetar Avramovic  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
11348968c47SPetar Avramovic  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
11448968c47SPetar Avramovic  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
11548968c47SPetar Avramovic  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
11648968c47SPetar Avramovic  ; GFX908-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
11748968c47SPetar Avramovic  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
11848968c47SPetar Avramovic  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
11948968c47SPetar Avramovic  ; GFX908-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
12048968c47SPetar Avramovic  ; GFX908-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
121f0415f2aSKrzysztof Drewniak  ; GFX908-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
12248968c47SPetar Avramovic  ; GFX908-NEXT:   S_ENDPGM 0
123*e7900e69SMatt Arsenault  ;
12448968c47SPetar Avramovic  ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_v2f16_bothen_no_rtn
12548968c47SPetar Avramovic  ; GFX90A_GFX940: bb.1 (%ir-block.0):
12648968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
12748968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT: {{  $}}
12848968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
12948968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
13048968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
13148968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
13248968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
13348968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
13448968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
13548968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
13648968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
13748968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
138f0415f2aSKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x s16>), align 1, addrspace 8)
13948968c47SPetar Avramovic  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
14048968c47SPetar Avramovic  %ret = call <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
14148968c47SPetar Avramovic  ret void
14248968c47SPetar Avramovic}
14348968c47SPetar Avramovic
144faa2c678SKrzysztof Drewniakdefine amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_offset_no_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) {
145faa2c678SKrzysztof Drewniak  ; GFX908-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_no_rtn
146faa2c678SKrzysztof Drewniak  ; GFX908: bb.1 (%ir-block.0):
147faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0
148faa2c678SKrzysztof Drewniak  ; GFX908-NEXT: {{  $}}
149faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
150faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
151faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
152faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
153faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
154faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
155faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
156ab379378SKrzysztof Drewniak  ; GFX908-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8)
157faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   S_ENDPGM 0
158*e7900e69SMatt Arsenault  ;
159faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offset_no_rtn
160faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940: bb.1 (%ir-block.0):
161faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0
162faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT: {{  $}}
163faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
164faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
165faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
166faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
167faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
168faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4
169faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
170ab379378SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 4095, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8)
171faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
172faa2c678SKrzysztof Drewniak  %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 4095, i32 %soffset, i32 0)
173faa2c678SKrzysztof Drewniak  ret void
174faa2c678SKrzysztof Drewniak}
175faa2c678SKrzysztof Drewniak
176faa2c678SKrzysztof Drewniakdefine amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_offen_no_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) {
177faa2c678SKrzysztof Drewniak  ; GFX908-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_no_rtn
178faa2c678SKrzysztof Drewniak  ; GFX908: bb.1 (%ir-block.0):
179faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
180faa2c678SKrzysztof Drewniak  ; GFX908-NEXT: {{  $}}
181faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
182faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
183faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
184faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
185faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
186faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
187faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
188faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
189ab379378SKrzysztof Drewniak  ; GFX908-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8)
190faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   S_ENDPGM 0
191*e7900e69SMatt Arsenault  ;
192faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_offen_no_rtn
193faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940: bb.1 (%ir-block.0):
194faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
195faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT: {{  $}}
196faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
197faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
198faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
199faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
200faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
201faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
202faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
203faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
204ab379378SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8)
205faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
206faa2c678SKrzysztof Drewniak  %ret = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0)
207faa2c678SKrzysztof Drewniak  ret void
208faa2c678SKrzysztof Drewniak}
209faa2c678SKrzysztof Drewniak
210faa2c678SKrzysztof Drewniakdefine amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_idxen_no_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) {
211faa2c678SKrzysztof Drewniak  ; GFX908-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_no_rtn
212faa2c678SKrzysztof Drewniak  ; GFX908: bb.1 (%ir-block.0):
213faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
214faa2c678SKrzysztof Drewniak  ; GFX908-NEXT: {{  $}}
215faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
216faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
217faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
218faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
219faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
220faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
221faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
222faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
223ab379378SKrzysztof Drewniak  ; GFX908-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8)
224faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   S_ENDPGM 0
225*e7900e69SMatt Arsenault  ;
226faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_idxen_no_rtn
227faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940: bb.1 (%ir-block.0):
228faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1
229faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT: {{  $}}
230faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
231faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
232faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
233faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
234faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
235faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
236faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4
237faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
238ab379378SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8)
239faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
240faa2c678SKrzysztof Drewniak  %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0)
241faa2c678SKrzysztof Drewniak  ret void
242faa2c678SKrzysztof Drewniak}
243faa2c678SKrzysztof Drewniak
244faa2c678SKrzysztof Drewniakdefine amdgpu_ps void @buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn(<2 x half> %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) {
245faa2c678SKrzysztof Drewniak  ; GFX908-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn
246faa2c678SKrzysztof Drewniak  ; GFX908: bb.1 (%ir-block.0):
247faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
248faa2c678SKrzysztof Drewniak  ; GFX908-NEXT: {{  $}}
249faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
250faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
251faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
252faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
253faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
254faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
255faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
256faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
257faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
258faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
259ab379378SKrzysztof Drewniak  ; GFX908-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8)
260faa2c678SKrzysztof Drewniak  ; GFX908-NEXT:   S_ENDPGM 0
261*e7900e69SMatt Arsenault  ;
262faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_v2f16_bothen_no_rtn
263faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940: bb.1 (%ir-block.0):
264faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2
265faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT: {{  $}}
266faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
267faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
268faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1
269faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2
270faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3
271faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
272faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2
273faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4
274faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3
275faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1
276ab379378SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   BUFFER_ATOMIC_PK_ADD_F16_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (<2 x s16>) on %ir.rsrc, align 1, addrspace 8)
277faa2c678SKrzysztof Drewniak  ; GFX90A_GFX940-NEXT:   S_ENDPGM 0
278faa2c678SKrzysztof Drewniak  %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2)
279faa2c678SKrzysztof Drewniak  ret void
280faa2c678SKrzysztof Drewniak}
281faa2c678SKrzysztof Drewniak
28248968c47SPetar Avramovicdeclare <2 x half> @llvm.amdgcn.raw.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32 immarg)
28348968c47SPetar Avramovicdeclare <2 x half> @llvm.amdgcn.struct.buffer.atomic.fadd.v2f16(<2 x half>, <4 x i32>, i32, i32, i32, i32 immarg)
284faa2c678SKrzysztof Drewniak
285faa2c678SKrzysztof Drewniakdeclare <2 x half> @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32 immarg)
286faa2c678SKrzysztof Drewniakdeclare <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half>, ptr addrspace(8), i32, i32, i32, i32 immarg)
287