148968c47SPetar Avramovic; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2*9e9907f1SFangrui Song; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s 3*9e9907f1SFangrui Song; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s 448968c47SPetar Avramovic 548968c47SPetar Avramovicdefine amdgpu_ps void @buffer_atomic_fadd_f64_offset_no_rtn(double %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) { 648968c47SPetar Avramovic ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_offset_no_rtn 748968c47SPetar Avramovic ; GFX90A_GFX940: bb.1 (%ir-block.0): 848968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 948968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: {{ $}} 1048968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 1148968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 1248968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 1348968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 1448968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 1548968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 1648968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 1748968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 1848968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 19f0415f2aSKrzysztof Drewniak ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) 2048968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: S_ENDPGM 0 2148968c47SPetar Avramovic %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) 2248968c47SPetar Avramovic ret void 2348968c47SPetar Avramovic} 2448968c47SPetar Avramovic 2548968c47SPetar Avramovicdefine amdgpu_ps void @buffer_atomic_fadd_f64_offen_no_rtn(double %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 2648968c47SPetar Avramovic ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_offen_no_rtn 2748968c47SPetar Avramovic ; GFX90A_GFX940: bb.1 (%ir-block.0): 2848968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 2948968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: {{ $}} 3048968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 3148968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 3248968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 3348968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 3448968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 3548968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 3648968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 3748968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 3848968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 3948968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 40f0415f2aSKrzysztof Drewniak ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) 4148968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: S_ENDPGM 0 4248968c47SPetar Avramovic %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 4348968c47SPetar Avramovic ret void 4448968c47SPetar Avramovic} 4548968c47SPetar Avramovic 4648968c47SPetar Avramovicdefine amdgpu_ps void @buffer_atomic_fadd_f64_idxen_no_rtn(double %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 inreg %soffset) { 4748968c47SPetar Avramovic ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_idxen_no_rtn 4848968c47SPetar Avramovic ; GFX90A_GFX940: bb.1 (%ir-block.0): 4948968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 5048968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: {{ $}} 5148968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 5248968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 5348968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 5448968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 5548968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 5648968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 5748968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 5848968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 5948968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 6048968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 61f0415f2aSKrzysztof Drewniak ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) 6248968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: S_ENDPGM 0 6348968c47SPetar Avramovic %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) 6448968c47SPetar Avramovic ret void 6548968c47SPetar Avramovic} 6648968c47SPetar Avramovic 6748968c47SPetar Avramovicdefine amdgpu_ps void @buffer_atomic_fadd_f64_bothen_no_rtn(double %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 6848968c47SPetar Avramovic ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_bothen_no_rtn 6948968c47SPetar Avramovic ; GFX90A_GFX940: bb.1 (%ir-block.0): 7048968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3 7148968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: {{ $}} 7248968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 7348968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 7448968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 7548968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 7648968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 7748968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 7848968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 7948968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 8048968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 8148968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 8248968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr4 8348968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 84f0415f2aSKrzysztof Drewniak ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) 8548968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: S_ENDPGM 0 8648968c47SPetar Avramovic %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) 8748968c47SPetar Avramovic ret void 8848968c47SPetar Avramovic} 8948968c47SPetar Avramovic 9048968c47SPetar Avramovicdefine amdgpu_ps double @buffer_atomic_fadd_f64_offset_rtn(double %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) { 9148968c47SPetar Avramovic ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_offset_rtn 9248968c47SPetar Avramovic ; GFX90A_GFX940: bb.1 (%ir-block.0): 9348968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 9448968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: {{ $}} 9548968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 9648968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 9748968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 9848968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 9948968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 10048968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 10148968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 10248968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 10348968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 104f0415f2aSKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) 10548968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0 10648968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1 10748968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec 10848968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 10948968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec 11048968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 11148968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 11248968c47SPetar Avramovic %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) 11348968c47SPetar Avramovic ret double %ret 11448968c47SPetar Avramovic} 11548968c47SPetar Avramovic 11648968c47SPetar Avramovicdefine amdgpu_ps double @buffer_atomic_fadd_f64_offen_rtn(double %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 11748968c47SPetar Avramovic ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_offen_rtn 11848968c47SPetar Avramovic ; GFX90A_GFX940: bb.1 (%ir-block.0): 11948968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 12048968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: {{ $}} 12148968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 12248968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 12348968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 12448968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 12548968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 12648968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 12748968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 12848968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 12948968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 13048968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 131f0415f2aSKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) 13248968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0 13348968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1 13448968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec 13548968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 13648968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec 13748968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 13848968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 13948968c47SPetar Avramovic %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 14048968c47SPetar Avramovic ret double %ret 14148968c47SPetar Avramovic} 14248968c47SPetar Avramovic 14348968c47SPetar Avramovicdefine amdgpu_ps double @buffer_atomic_fadd_f64_idxen_rtn(double %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 inreg %soffset) { 14448968c47SPetar Avramovic ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_idxen_rtn 14548968c47SPetar Avramovic ; GFX90A_GFX940: bb.1 (%ir-block.0): 14648968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 14748968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: {{ $}} 14848968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 14948968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 15048968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 15148968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 15248968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 15348968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 15448968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 15548968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 15648968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 15748968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 158f0415f2aSKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) 15948968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0 16048968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1 16148968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec 16248968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 16348968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec 16448968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 16548968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 16648968c47SPetar Avramovic %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) 16748968c47SPetar Avramovic ret double %ret 16848968c47SPetar Avramovic} 16948968c47SPetar Avramovic 17048968c47SPetar Avramovicdefine amdgpu_ps double @buffer_atomic_fadd_f64_bothen_rtn(double %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 17148968c47SPetar Avramovic ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f64_bothen_rtn 17248968c47SPetar Avramovic ; GFX90A_GFX940: bb.1 (%ir-block.0): 17348968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3 17448968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: {{ $}} 17548968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 17648968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 17748968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 17848968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 17948968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 18048968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 18148968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 18248968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 18348968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 18448968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 18548968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr4 18648968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 187f0415f2aSKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) 18848968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0 18948968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1 19048968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec 19148968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 19248968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec 19348968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 19448968c47SPetar Avramovic ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 19548968c47SPetar Avramovic %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 19648968c47SPetar Avramovic ret double %ret 19748968c47SPetar Avramovic} 19848968c47SPetar Avramovic 199faa2c678SKrzysztof Drewniakdefine amdgpu_ps void @buffer_ptr_atomic_fadd_f64_offset_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) { 200faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_no_rtn 201faa2c678SKrzysztof Drewniak ; GFX90A_GFX940: bb.1 (%ir-block.0): 202faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 203faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: {{ $}} 204faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 205faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 206faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 207faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 208faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 209faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 210faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 211faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 212faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 213ab379378SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_OFFSET [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) 214faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: S_ENDPGM 0 215faa2c678SKrzysztof Drewniak %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0) 216faa2c678SKrzysztof Drewniak ret void 217faa2c678SKrzysztof Drewniak} 218faa2c678SKrzysztof Drewniak 219faa2c678SKrzysztof Drewniakdefine amdgpu_ps void @buffer_ptr_atomic_fadd_f64_offen_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 220faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_no_rtn 221faa2c678SKrzysztof Drewniak ; GFX90A_GFX940: bb.1 (%ir-block.0): 222faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 223faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: {{ $}} 224faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 225faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 226faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 227faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 228faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 229faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 230faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 231faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 232faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 233faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 234ab379378SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_OFFEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) 235faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: S_ENDPGM 0 236faa2c678SKrzysztof Drewniak %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 237faa2c678SKrzysztof Drewniak ret void 238faa2c678SKrzysztof Drewniak} 239faa2c678SKrzysztof Drewniak 240faa2c678SKrzysztof Drewniakdefine amdgpu_ps void @buffer_ptr_atomic_fadd_f64_idxen_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) { 241faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_no_rtn 242faa2c678SKrzysztof Drewniak ; GFX90A_GFX940: bb.1 (%ir-block.0): 243faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 244faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: {{ $}} 245faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 246faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 247faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 248faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 249faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 250faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 251faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 252faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 253faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 254faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 255ab379378SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_IDXEN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) 256faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: S_ENDPGM 0 257faa2c678SKrzysztof Drewniak %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) 258faa2c678SKrzysztof Drewniak ret void 259faa2c678SKrzysztof Drewniak} 260faa2c678SKrzysztof Drewniak 261faa2c678SKrzysztof Drewniakdefine amdgpu_ps void @buffer_ptr_atomic_fadd_f64_bothen_no_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 262faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_no_rtn 263faa2c678SKrzysztof Drewniak ; GFX90A_GFX940: bb.1 (%ir-block.0): 264faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3 265faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: {{ $}} 266faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 267faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 268faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 269faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 270faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 271faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 272faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 273faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 274faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 275faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr4 276faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 277faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 278ab379378SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F64_BOTHEN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) 279faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: S_ENDPGM 0 280faa2c678SKrzysztof Drewniak %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) 281faa2c678SKrzysztof Drewniak ret void 282faa2c678SKrzysztof Drewniak} 283faa2c678SKrzysztof Drewniak 284faa2c678SKrzysztof Drewniakdefine amdgpu_ps double @buffer_ptr_atomic_fadd_f64_offset_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) { 285faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_offset_rtn 286faa2c678SKrzysztof Drewniak ; GFX90A_GFX940: bb.1 (%ir-block.0): 287faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 288faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: {{ $}} 289faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 290faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 291faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 292faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 293faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 294faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 295faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 296faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 297faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 298ab379378SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFSET_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], [[COPY6]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) 299faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub0 300faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFSET_RTN]].sub1 301faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec 302faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 303faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec 304faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 305faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 306faa2c678SKrzysztof Drewniak %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0) 307faa2c678SKrzysztof Drewniak ret double %ret 308faa2c678SKrzysztof Drewniak} 309faa2c678SKrzysztof Drewniak 310faa2c678SKrzysztof Drewniakdefine amdgpu_ps double @buffer_ptr_atomic_fadd_f64_offen_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 311faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_offen_rtn 312faa2c678SKrzysztof Drewniak ; GFX90A_GFX940: bb.1 (%ir-block.0): 313faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 314faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: {{ $}} 315faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 316faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 317faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 318faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 319faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 320faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 321faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 322faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 323faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 324faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 325ab379378SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_OFFEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) 326faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub0 327faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_OFFEN_RTN]].sub1 328faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec 329faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 330faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec 331faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 332faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 333faa2c678SKrzysztof Drewniak %ret = call double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 334faa2c678SKrzysztof Drewniak ret double %ret 335faa2c678SKrzysztof Drewniak} 336faa2c678SKrzysztof Drewniak 337faa2c678SKrzysztof Drewniakdefine amdgpu_ps double @buffer_ptr_atomic_fadd_f64_idxen_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) { 338faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_idxen_rtn 339faa2c678SKrzysztof Drewniak ; GFX90A_GFX940: bb.1 (%ir-block.0): 340faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 341faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: {{ $}} 342faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 343faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 344faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 345faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 346faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 347faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 348faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 349faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 350faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 351faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 352ab379378SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_IDXEN_RTN [[REG_SEQUENCE]], [[COPY6]], [[REG_SEQUENCE1]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) 353faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub0 354faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_IDXEN_RTN]].sub1 355faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY8]], implicit $exec 356faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 357faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec 358faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 359faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 360faa2c678SKrzysztof Drewniak %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) 361faa2c678SKrzysztof Drewniak ret double %ret 362faa2c678SKrzysztof Drewniak} 363faa2c678SKrzysztof Drewniak 364faa2c678SKrzysztof Drewniakdefine amdgpu_ps double @buffer_ptr_atomic_fadd_f64_bothen_rtn(double %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 365faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f64_bothen_rtn 366faa2c678SKrzysztof Drewniak ; GFX90A_GFX940: bb.1 (%ir-block.0): 367faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3 368faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: {{ $}} 369faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 370faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 371faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 372faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr0 373faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr1 374faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 375faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 376faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 377faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 378faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr4 379faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 380faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY6]], %subreg.sub0, [[COPY7]], %subreg.sub1 381ab379378SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN:%[0-9]+]]:vreg_64_align2 = BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], [[COPY8]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) 382faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub0 383faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_ADD_F64_BOTHEN_RTN]].sub1 384faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec 385faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 386faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY10]], implicit $exec 387faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 388faa2c678SKrzysztof Drewniak ; GFX90A_GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 389faa2c678SKrzysztof Drewniak %ret = call double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) 390faa2c678SKrzysztof Drewniak ret double %ret 391faa2c678SKrzysztof Drewniak} 392faa2c678SKrzysztof Drewniak 39348968c47SPetar Avramovicdeclare double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32 immarg) 39448968c47SPetar Avramovicdeclare double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32, i32 immarg) 395faa2c678SKrzysztof Drewniak 396faa2c678SKrzysztof Drewniakdeclare double @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f64(double, ptr addrspace(8), i32, i32, i32 immarg) 397faa2c678SKrzysztof Drewniakdeclare double @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f64(double, ptr addrspace(8), i32, i32, i32, i32 immarg) 398