1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908_GFX11 %s 3; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s 4; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX90A_GFX940 %s 5; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX908_GFX11 %s 6; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs -stop-after=instruction-select < %s | FileCheck -check-prefix=GFX12 %s 7 8define amdgpu_ps void @buffer_atomic_fadd_f32_offset_no_rtn(float %val, <4 x i32> inreg %rsrc, i32 inreg %soffset) { 9 ; GFX908_GFX11-LABEL: name: buffer_atomic_fadd_f32_offset_no_rtn 10 ; GFX908_GFX11: bb.1 (%ir-block.0): 11 ; GFX908_GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 12 ; GFX908_GFX11-NEXT: {{ $}} 13 ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 14 ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 15 ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 16 ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 17 ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 18 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 19 ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 20 ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 21 ; GFX908_GFX11-NEXT: S_ENDPGM 0 22 ; 23 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offset_no_rtn 24 ; GFX90A_GFX940: bb.1 (%ir-block.0): 25 ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 26 ; GFX90A_GFX940-NEXT: {{ $}} 27 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 28 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 29 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 30 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 31 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 32 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 33 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 34 ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 35 ; GFX90A_GFX940-NEXT: S_ENDPGM 0 36 ; 37 ; GFX12-LABEL: name: buffer_atomic_fadd_f32_offset_no_rtn 38 ; GFX12: bb.1 (%ir-block.0): 39 ; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 40 ; GFX12-NEXT: {{ $}} 41 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 42 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 43 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 44 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 45 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 46 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 47 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 48 ; GFX12-NEXT: BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 49 ; GFX12-NEXT: S_ENDPGM 0 50 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 0, i32 %soffset, i32 0) 51 ret void 52} 53 54define amdgpu_ps void @buffer_atomic_fadd_f32_offen_no_rtn(float %val, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 55 ; GFX908_GFX11-LABEL: name: buffer_atomic_fadd_f32_offen_no_rtn 56 ; GFX908_GFX11: bb.1 (%ir-block.0): 57 ; GFX908_GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 58 ; GFX908_GFX11-NEXT: {{ $}} 59 ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 60 ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 61 ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 62 ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 63 ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 64 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 65 ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 66 ; GFX908_GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 67 ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 68 ; GFX908_GFX11-NEXT: S_ENDPGM 0 69 ; 70 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_offen_no_rtn 71 ; GFX90A_GFX940: bb.1 (%ir-block.0): 72 ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 73 ; GFX90A_GFX940-NEXT: {{ $}} 74 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 75 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 76 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 77 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 78 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 79 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 80 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 81 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 82 ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 83 ; GFX90A_GFX940-NEXT: S_ENDPGM 0 84 ; 85 ; GFX12-LABEL: name: buffer_atomic_fadd_f32_offen_no_rtn 86 ; GFX12: bb.1 (%ir-block.0): 87 ; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 88 ; GFX12-NEXT: {{ $}} 89 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 90 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 91 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 92 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 93 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 94 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 95 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 96 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 97 ; GFX12-NEXT: BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 98 ; GFX12-NEXT: S_ENDPGM 0 99 %ret = call float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 100 ret void 101} 102 103define amdgpu_ps void @buffer_atomic_fadd_f32_idxen_no_rtn(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 inreg %soffset) { 104 ; GFX908_GFX11-LABEL: name: buffer_atomic_fadd_f32_idxen_no_rtn 105 ; GFX908_GFX11: bb.1 (%ir-block.0): 106 ; GFX908_GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 107 ; GFX908_GFX11-NEXT: {{ $}} 108 ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 109 ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 110 ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 111 ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 112 ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 113 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 114 ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 115 ; GFX908_GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 116 ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 117 ; GFX908_GFX11-NEXT: S_ENDPGM 0 118 ; 119 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_idxen_no_rtn 120 ; GFX90A_GFX940: bb.1 (%ir-block.0): 121 ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 122 ; GFX90A_GFX940-NEXT: {{ $}} 123 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 124 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 125 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 126 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 127 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 128 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 129 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 130 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 131 ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 132 ; GFX90A_GFX940-NEXT: S_ENDPGM 0 133 ; 134 ; GFX12-LABEL: name: buffer_atomic_fadd_f32_idxen_no_rtn 135 ; GFX12: bb.1 (%ir-block.0): 136 ; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 137 ; GFX12-NEXT: {{ $}} 138 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 139 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 140 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 141 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 142 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 143 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 144 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 145 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 146 ; GFX12-NEXT: BUFFER_ATOMIC_ADD_F32_VBUFFER_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 147 ; GFX12-NEXT: S_ENDPGM 0 148 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) 149 ret void 150} 151 152define amdgpu_ps void @buffer_atomic_fadd_f32_bothen_no_rtn(float %val, <4 x i32> inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 153 ; GFX908_GFX11-LABEL: name: buffer_atomic_fadd_f32_bothen_no_rtn 154 ; GFX908_GFX11: bb.1 (%ir-block.0): 155 ; GFX908_GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 156 ; GFX908_GFX11-NEXT: {{ $}} 157 ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 158 ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 159 ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 160 ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 161 ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 162 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 163 ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 164 ; GFX908_GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 165 ; GFX908_GFX11-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 166 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 167 ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 168 ; GFX908_GFX11-NEXT: S_ENDPGM 0 169 ; 170 ; GFX90A_GFX940-LABEL: name: buffer_atomic_fadd_f32_bothen_no_rtn 171 ; GFX90A_GFX940: bb.1 (%ir-block.0): 172 ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 173 ; GFX90A_GFX940-NEXT: {{ $}} 174 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 175 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 176 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 177 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 178 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 179 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 180 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 181 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 182 ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 183 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 184 ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 185 ; GFX90A_GFX940-NEXT: S_ENDPGM 0 186 ; 187 ; GFX12-LABEL: name: buffer_atomic_fadd_f32_bothen_no_rtn 188 ; GFX12: bb.1 (%ir-block.0): 189 ; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 190 ; GFX12-NEXT: {{ $}} 191 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 192 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 193 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 194 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 195 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 196 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 197 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 198 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 199 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 200 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 201 ; GFX12-NEXT: BUFFER_ATOMIC_ADD_F32_VBUFFER_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 202 ; GFX12-NEXT: S_ENDPGM 0 203 %ret = call float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float %val, <4 x i32> %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) 204 ret void 205} 206 207define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_offset_no_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 inreg %soffset) { 208 ; GFX908_GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_no_rtn 209 ; GFX908_GFX11: bb.1 (%ir-block.0): 210 ; GFX908_GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 211 ; GFX908_GFX11-NEXT: {{ $}} 212 ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 213 ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 214 ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 215 ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 216 ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 217 ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 218 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 219 ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) 220 ; GFX908_GFX11-NEXT: S_ENDPGM 0 221 ; 222 ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_no_rtn 223 ; GFX90A_GFX940: bb.1 (%ir-block.0): 224 ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 225 ; GFX90A_GFX940-NEXT: {{ $}} 226 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 227 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 228 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 229 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 230 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 231 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 232 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 233 ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) 234 ; GFX90A_GFX940-NEXT: S_ENDPGM 0 235 ; 236 ; GFX12-LABEL: name: buffer_ptr_atomic_fadd_f32_offset_no_rtn 237 ; GFX12: bb.1 (%ir-block.0): 238 ; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0 239 ; GFX12-NEXT: {{ $}} 240 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 241 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 242 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 243 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 244 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 245 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr4 246 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 247 ; GFX12-NEXT: BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFSET [[COPY]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) 248 ; GFX12-NEXT: S_ENDPGM 0 249 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 0, i32 %soffset, i32 0) 250 ret void 251} 252 253define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_offen_no_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 254 ; GFX908_GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_no_rtn 255 ; GFX908_GFX11: bb.1 (%ir-block.0): 256 ; GFX908_GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 257 ; GFX908_GFX11-NEXT: {{ $}} 258 ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 259 ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 260 ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 261 ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 262 ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 263 ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 264 ; GFX908_GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 265 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 266 ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) 267 ; GFX908_GFX11-NEXT: S_ENDPGM 0 268 ; 269 ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_no_rtn 270 ; GFX90A_GFX940: bb.1 (%ir-block.0): 271 ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 272 ; GFX90A_GFX940-NEXT: {{ $}} 273 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 274 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 275 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 276 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 277 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 278 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 279 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 280 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 281 ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) 282 ; GFX90A_GFX940-NEXT: S_ENDPGM 0 283 ; 284 ; GFX12-LABEL: name: buffer_ptr_atomic_fadd_f32_offen_no_rtn 285 ; GFX12: bb.1 (%ir-block.0): 286 ; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 287 ; GFX12-NEXT: {{ $}} 288 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 289 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 290 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 291 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 292 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 293 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 294 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 295 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 296 ; GFX12-NEXT: BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) 297 ; GFX12-NEXT: S_ENDPGM 0 298 %ret = call float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 299 ret void 300} 301 302define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_idxen_no_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 inreg %soffset) { 303 ; GFX908_GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_no_rtn 304 ; GFX908_GFX11: bb.1 (%ir-block.0): 305 ; GFX908_GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 306 ; GFX908_GFX11-NEXT: {{ $}} 307 ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 308 ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 309 ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 310 ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 311 ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 312 ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 313 ; GFX908_GFX11-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 314 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 315 ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) 316 ; GFX908_GFX11-NEXT: S_ENDPGM 0 317 ; 318 ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_no_rtn 319 ; GFX90A_GFX940: bb.1 (%ir-block.0): 320 ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 321 ; GFX90A_GFX940-NEXT: {{ $}} 322 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 323 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 324 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 325 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 326 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 327 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 328 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 329 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 330 ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) 331 ; GFX90A_GFX940-NEXT: S_ENDPGM 0 332 ; 333 ; GFX12-LABEL: name: buffer_ptr_atomic_fadd_f32_idxen_no_rtn 334 ; GFX12: bb.1 (%ir-block.0): 335 ; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1 336 ; GFX12-NEXT: {{ $}} 337 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 338 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 339 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 340 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 341 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 342 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 343 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 344 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 345 ; GFX12-NEXT: BUFFER_ATOMIC_ADD_F32_VBUFFER_IDXEN [[COPY]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) 346 ; GFX12-NEXT: S_ENDPGM 0 347 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 0, i32 %soffset, i32 0) 348 ret void 349} 350 351define amdgpu_ps void @buffer_ptr_atomic_fadd_f32_bothen_no_rtn(float %val, ptr addrspace(8) inreg %rsrc, i32 %vindex, i32 %voffset, i32 inreg %soffset) { 352 ; GFX908_GFX11-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_no_rtn 353 ; GFX908_GFX11: bb.1 (%ir-block.0): 354 ; GFX908_GFX11-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 355 ; GFX908_GFX11-NEXT: {{ $}} 356 ; GFX908_GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 357 ; GFX908_GFX11-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 358 ; GFX908_GFX11-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 359 ; GFX908_GFX11-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 360 ; GFX908_GFX11-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 361 ; GFX908_GFX11-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 362 ; GFX908_GFX11-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 363 ; GFX908_GFX11-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 364 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 365 ; GFX908_GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 366 ; GFX908_GFX11-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) 367 ; GFX908_GFX11-NEXT: S_ENDPGM 0 368 ; 369 ; GFX90A_GFX940-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_no_rtn 370 ; GFX90A_GFX940: bb.1 (%ir-block.0): 371 ; GFX90A_GFX940-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 372 ; GFX90A_GFX940-NEXT: {{ $}} 373 ; GFX90A_GFX940-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 374 ; GFX90A_GFX940-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 375 ; GFX90A_GFX940-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 376 ; GFX90A_GFX940-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 377 ; GFX90A_GFX940-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 378 ; GFX90A_GFX940-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 379 ; GFX90A_GFX940-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 380 ; GFX90A_GFX940-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 381 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 382 ; GFX90A_GFX940-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 383 ; GFX90A_GFX940-NEXT: BUFFER_ATOMIC_ADD_F32_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) 384 ; GFX90A_GFX940-NEXT: S_ENDPGM 0 385 ; 386 ; GFX12-LABEL: name: buffer_ptr_atomic_fadd_f32_bothen_no_rtn 387 ; GFX12: bb.1 (%ir-block.0): 388 ; GFX12-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2 389 ; GFX12-NEXT: {{ $}} 390 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 391 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0 392 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr1 393 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr2 394 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr3 395 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 396 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 397 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr4 398 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY4]], %subreg.sub3 399 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY6]], %subreg.sub1 400 ; GFX12-NEXT: BUFFER_ATOMIC_ADD_F32_VBUFFER_BOTHEN [[COPY]], [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[COPY7]], 0, 2, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) 401 ; GFX12-NEXT: S_ENDPGM 0 402 %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 2) 403 ret void 404} 405 406declare float @llvm.amdgcn.raw.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32 immarg) 407declare float @llvm.amdgcn.struct.buffer.atomic.fadd.f32(float, <4 x i32>, i32, i32, i32, i32 immarg) 408 409declare float @llvm.amdgcn.raw.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32 immarg) 410declare float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float, ptr addrspace(8), i32, i32, i32, i32 immarg) 411