1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX940 %s 3; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s 4; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX11 %s 5 6define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(ptr %ptr, float %data) { 7 ; GFX940-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic 8 ; GFX940: bb.1 (%ir-block.0): 9 ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 10 ; GFX940-NEXT: {{ $}} 11 ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 12 ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 13 ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 14 ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 15 ; GFX940-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr) 16 ; GFX940-NEXT: S_ENDPGM 0 17 ; 18 ; GFX11-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic 19 ; GFX11: bb.1 (%ir-block.0): 20 ; GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 21 ; GFX11-NEXT: {{ $}} 22 ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 23 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 24 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 25 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 26 ; GFX11-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr) 27 ; GFX11-NEXT: S_ENDPGM 0 28 %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr %ptr, float %data) 29 ret void 30} 31 32define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic(ptr %ptr, float %data) { 33 ; GFX940-LABEL: name: flat_atomic_fadd_f32_rtn_intrinsic 34 ; GFX940: bb.1 (%ir-block.0): 35 ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 36 ; GFX940-NEXT: {{ $}} 37 ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 38 ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 39 ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 40 ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 41 ; GFX940-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr) 42 ; GFX940-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] 43 ; GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 44 ; 45 ; GFX11-LABEL: name: flat_atomic_fadd_f32_rtn_intrinsic 46 ; GFX11: bb.1 (%ir-block.0): 47 ; GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 48 ; GFX11-NEXT: {{ $}} 49 ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 50 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 51 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 52 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 53 ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr) 54 ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] 55 ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 56 %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr %ptr, float %data) 57 ret float %ret 58} 59 60define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_atomicrmw(ptr %ptr, float %data) { 61 ; GFX940-LABEL: name: flat_atomic_fadd_f32_no_rtn_atomicrmw 62 ; GFX940: bb.1 (%ir-block.0): 63 ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 64 ; GFX940-NEXT: {{ $}} 65 ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 66 ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 67 ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 68 ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 69 ; GFX940-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) 70 ; GFX940-NEXT: S_ENDPGM 0 71 ; 72 ; GFX11-LABEL: name: flat_atomic_fadd_f32_no_rtn_atomicrmw 73 ; GFX11: bb.1 (%ir-block.0): 74 ; GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 75 ; GFX11-NEXT: {{ $}} 76 ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 77 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 78 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 79 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 80 ; GFX11-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) 81 ; GFX11-NEXT: S_ENDPGM 0 82 %ret = atomicrmw fadd ptr %ptr, float %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0 83 ret void 84} 85 86define amdgpu_ps float @flat_atomic_fadd_f32_rtn_atomicrmw(ptr %ptr, float %data) { 87 ; GFX940-LABEL: name: flat_atomic_fadd_f32_rtn_atomicrmw 88 ; GFX940: bb.1 (%ir-block.0): 89 ; GFX940-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 90 ; GFX940-NEXT: {{ $}} 91 ; GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 92 ; GFX940-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 93 ; GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 94 ; GFX940-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 95 ; GFX940-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) 96 ; GFX940-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] 97 ; GFX940-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 98 ; 99 ; GFX11-LABEL: name: flat_atomic_fadd_f32_rtn_atomicrmw 100 ; GFX11: bb.1 (%ir-block.0): 101 ; GFX11-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2 102 ; GFX11-NEXT: {{ $}} 103 ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 104 ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 105 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 106 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 107 ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s32) on %ir.ptr) 108 ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] 109 ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 110 %ret = atomicrmw fadd ptr %ptr, float %data syncscope("wavefront") monotonic, !amdgpu.no.fine.grained.memory !0 111 ret float %ret 112} 113 114declare float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr, float) 115 116!0 = !{} 117