1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX8 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX12 %s 4 5 6; Natural mapping 7define amdgpu_ps float @raw_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset(i32 %val, i32 %cmp, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 8 ; GFX8-LABEL: name: raw_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset 9 ; GFX8: bb.1 (%ir-block.0): 10 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 11 ; GFX8-NEXT: {{ $}} 12 ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 13 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 14 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 15 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 16 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 17 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 18 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 19 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 20 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 21 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 22 ; GFX8-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 23 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 24 ; GFX8-NEXT: $vgpr0 = COPY [[COPY8]] 25 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 26 ; 27 ; GFX12-LABEL: name: raw_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset 28 ; GFX12: bb.1 (%ir-block.0): 29 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 30 ; GFX12-NEXT: {{ $}} 31 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 32 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 33 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 34 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 35 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 36 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 37 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 38 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 39 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 40 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 41 ; GFX12-NEXT: [[BUFFER_ATOMIC_CMPSWAP_VBUFFER_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_VBUFFER_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 42 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_VBUFFER_OFFEN_RTN]].sub0 43 ; GFX12-NEXT: $vgpr0 = COPY [[COPY8]] 44 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 45 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 46 %cast = bitcast i32 %ret to float 47 ret float %cast 48} 49 50; Natural mapping 51define amdgpu_ps void @raw_buffer_atomic_cmpswap_i32_noret__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset(i32 %val, i32 %cmp, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 52 ; GFX8-LABEL: name: raw_buffer_atomic_cmpswap_i32_noret__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset 53 ; GFX8: bb.1 (%ir-block.0): 54 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 55 ; GFX8-NEXT: {{ $}} 56 ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 57 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 58 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 59 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 60 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 61 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 62 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 63 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 64 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 65 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 66 ; GFX8-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 67 ; GFX8-NEXT: S_ENDPGM 0 68 ; 69 ; GFX12-LABEL: name: raw_buffer_atomic_cmpswap_i32_noret__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset 70 ; GFX12: bb.1 (%ir-block.0): 71 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 72 ; GFX12-NEXT: {{ $}} 73 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 74 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 75 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 76 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 77 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 78 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 79 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 80 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 81 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 82 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 83 ; GFX12-NEXT: BUFFER_ATOMIC_CMPSWAP_VBUFFER_OFFEN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 84 ; GFX12-NEXT: S_ENDPGM 0 85 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 86 ret void 87} 88 89; All operands need regbank legalization 90define amdgpu_ps float @raw_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset(i32 inreg %val, i32 inreg %cmp, <4 x i32> %rsrc, i32 inreg %voffset, i32 %soffset) { 91 ; GFX8-LABEL: name: raw_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset 92 ; GFX8: bb.1 (%ir-block.0): 93 ; GFX8-NEXT: successors: %bb.2(0x80000000) 94 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 95 ; GFX8-NEXT: {{ $}} 96 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 97 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 98 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 99 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 100 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 101 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr3 102 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 103 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 104 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 105 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] 106 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] 107 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] 108 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 109 ; GFX8-NEXT: {{ $}} 110 ; GFX8-NEXT: bb.2: 111 ; GFX8-NEXT: successors: %bb.3(0x80000000) 112 ; GFX8-NEXT: {{ $}} 113 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec 114 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec 115 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 116 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 117 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 118 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 119 ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 120 ; GFX8-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 121 ; GFX8-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 122 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec 123 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec 124 ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc 125 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec 126 ; GFX8-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec 127 ; GFX8-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc 128 ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 129 ; GFX8-NEXT: {{ $}} 130 ; GFX8-NEXT: bb.3: 131 ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) 132 ; GFX8-NEXT: {{ $}} 133 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 134 ; GFX8-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE2]], [[COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 135 ; GFX8-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 136 ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 137 ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec 138 ; GFX8-NEXT: {{ $}} 139 ; GFX8-NEXT: bb.4: 140 ; GFX8-NEXT: successors: %bb.5(0x80000000) 141 ; GFX8-NEXT: {{ $}} 142 ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] 143 ; GFX8-NEXT: {{ $}} 144 ; GFX8-NEXT: bb.5: 145 ; GFX8-NEXT: $vgpr0 = COPY [[COPY15]] 146 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 147 ; 148 ; GFX12-LABEL: name: raw_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset 149 ; GFX12: bb.1 (%ir-block.0): 150 ; GFX12-NEXT: successors: %bb.2(0x80000000) 151 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 152 ; GFX12-NEXT: {{ $}} 153 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 154 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 155 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 156 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 157 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 158 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr3 159 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 160 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 161 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 162 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] 163 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] 164 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] 165 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo 166 ; GFX12-NEXT: {{ $}} 167 ; GFX12-NEXT: bb.2: 168 ; GFX12-NEXT: successors: %bb.3(0x80000000) 169 ; GFX12-NEXT: {{ $}} 170 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec 171 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec 172 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 173 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 174 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 175 ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 176 ; GFX12-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 177 ; GFX12-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 178 ; GFX12-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 179 ; GFX12-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec 180 ; GFX12-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec 181 ; GFX12-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc 182 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec 183 ; GFX12-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec 184 ; GFX12-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc 185 ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec 186 ; GFX12-NEXT: {{ $}} 187 ; GFX12-NEXT: bb.3: 188 ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) 189 ; GFX12-NEXT: {{ $}} 190 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 191 ; GFX12-NEXT: [[BUFFER_ATOMIC_CMPSWAP_VBUFFER_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_VBUFFER_OFFEN_RTN [[REG_SEQUENCE2]], [[COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 192 ; GFX12-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_VBUFFER_OFFEN_RTN]].sub0 193 ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc 194 ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec 195 ; GFX12-NEXT: {{ $}} 196 ; GFX12-NEXT: bb.4: 197 ; GFX12-NEXT: successors: %bb.5(0x80000000) 198 ; GFX12-NEXT: {{ $}} 199 ; GFX12-NEXT: $exec_lo = S_MOV_B32_term [[S_MOV_B32_]] 200 ; GFX12-NEXT: {{ $}} 201 ; GFX12-NEXT: bb.5: 202 ; GFX12-NEXT: $vgpr0 = COPY [[COPY15]] 203 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 204 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 205 %cast = bitcast i32 %ret to float 206 ret float %cast 207} 208 209; All operands need regbank legalization 210define amdgpu_ps void @raw_buffer_atomic_cmpswap_i32_noret__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset(i32 inreg %val, i32 inreg %cmp, <4 x i32> %rsrc, i32 inreg %voffset, i32 %soffset) { 211 ; GFX8-LABEL: name: raw_buffer_atomic_cmpswap_i32_noret__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset 212 ; GFX8: bb.1 (%ir-block.0): 213 ; GFX8-NEXT: successors: %bb.2(0x80000000) 214 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 215 ; GFX8-NEXT: {{ $}} 216 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 217 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 218 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 219 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 220 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 221 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr3 222 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 223 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 224 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 225 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] 226 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] 227 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] 228 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 229 ; GFX8-NEXT: {{ $}} 230 ; GFX8-NEXT: bb.2: 231 ; GFX8-NEXT: successors: %bb.3(0x80000000) 232 ; GFX8-NEXT: {{ $}} 233 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec 234 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec 235 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 236 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 237 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 238 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 239 ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 240 ; GFX8-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 241 ; GFX8-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 242 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec 243 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec 244 ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc 245 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec 246 ; GFX8-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec 247 ; GFX8-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc 248 ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 249 ; GFX8-NEXT: {{ $}} 250 ; GFX8-NEXT: bb.3: 251 ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) 252 ; GFX8-NEXT: {{ $}} 253 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 254 ; GFX8-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE2]], [[COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 255 ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 256 ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec 257 ; GFX8-NEXT: {{ $}} 258 ; GFX8-NEXT: bb.4: 259 ; GFX8-NEXT: successors: %bb.5(0x80000000) 260 ; GFX8-NEXT: {{ $}} 261 ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] 262 ; GFX8-NEXT: {{ $}} 263 ; GFX8-NEXT: bb.5: 264 ; GFX8-NEXT: S_ENDPGM 0 265 ; 266 ; GFX12-LABEL: name: raw_buffer_atomic_cmpswap_i32_noret__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset 267 ; GFX12: bb.1 (%ir-block.0): 268 ; GFX12-NEXT: successors: %bb.2(0x80000000) 269 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 270 ; GFX12-NEXT: {{ $}} 271 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 272 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 273 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 274 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 275 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 276 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr3 277 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 278 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 279 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 280 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] 281 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] 282 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] 283 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo 284 ; GFX12-NEXT: {{ $}} 285 ; GFX12-NEXT: bb.2: 286 ; GFX12-NEXT: successors: %bb.3(0x80000000) 287 ; GFX12-NEXT: {{ $}} 288 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec 289 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec 290 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 291 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 292 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 293 ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 294 ; GFX12-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 295 ; GFX12-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 296 ; GFX12-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 297 ; GFX12-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec 298 ; GFX12-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec 299 ; GFX12-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc 300 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec 301 ; GFX12-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec 302 ; GFX12-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc 303 ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec 304 ; GFX12-NEXT: {{ $}} 305 ; GFX12-NEXT: bb.3: 306 ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) 307 ; GFX12-NEXT: {{ $}} 308 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 309 ; GFX12-NEXT: BUFFER_ATOMIC_CMPSWAP_VBUFFER_OFFEN [[REG_SEQUENCE2]], [[COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 310 ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc 311 ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec 312 ; GFX12-NEXT: {{ $}} 313 ; GFX12-NEXT: bb.4: 314 ; GFX12-NEXT: successors: %bb.5(0x80000000) 315 ; GFX12-NEXT: {{ $}} 316 ; GFX12-NEXT: $exec_lo = S_MOV_B32_term [[S_MOV_B32_]] 317 ; GFX12-NEXT: {{ $}} 318 ; GFX12-NEXT: bb.5: 319 ; GFX12-NEXT: S_ENDPGM 0 320 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 321 ret void 322} 323 324define amdgpu_ps float @raw_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4095(i32 %val, i32 %cmp, <4 x i32> inreg %rsrc, i32 %voffset.base, i32 inreg %soffset) { 325 ; GFX8-LABEL: name: raw_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4095 326 ; GFX8: bb.1 (%ir-block.0): 327 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 328 ; GFX8-NEXT: {{ $}} 329 ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 330 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 331 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 332 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 333 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 334 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 335 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 336 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 337 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 338 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 339 ; GFX8-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 340 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 341 ; GFX8-NEXT: $vgpr0 = COPY [[COPY8]] 342 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 343 ; 344 ; GFX12-LABEL: name: raw_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4095 345 ; GFX12: bb.1 (%ir-block.0): 346 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 347 ; GFX12-NEXT: {{ $}} 348 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 349 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 350 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 351 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 352 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 353 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 354 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 355 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 356 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 357 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 358 ; GFX12-NEXT: [[BUFFER_ATOMIC_CMPSWAP_VBUFFER_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_VBUFFER_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 359 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_VBUFFER_OFFEN_RTN]].sub0 360 ; GFX12-NEXT: $vgpr0 = COPY [[COPY8]] 361 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 362 %voffset = add i32 %voffset.base, 4095 363 %ret = call i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 364 %cast = bitcast i32 %ret to float 365 ret float %cast 366} 367 368; Natural mapping 369define amdgpu_ps double @raw_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset(i64 %val, i64 %cmp, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 370 ; GFX8-LABEL: name: raw_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset 371 ; GFX8: bb.1 (%ir-block.0): 372 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 373 ; GFX8-NEXT: {{ $}} 374 ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 375 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 376 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 377 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 378 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 379 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 380 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 381 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 382 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 383 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 384 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 385 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 386 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 387 ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 388 ; GFX8-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN [[REG_SEQUENCE3]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) 389 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN]].sub0_sub1 390 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY10]].sub0 391 ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY10]].sub1 392 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec 393 ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 394 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec 395 ; GFX8-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 396 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 397 ; 398 ; GFX12-LABEL: name: raw_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset 399 ; GFX12: bb.1 (%ir-block.0): 400 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 401 ; GFX12-NEXT: {{ $}} 402 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 403 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 404 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 405 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 406 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 407 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 408 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 409 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 410 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 411 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 412 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 413 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 414 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 415 ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 416 ; GFX12-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_OFFEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_OFFEN_RTN [[REG_SEQUENCE3]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) 417 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_OFFEN_RTN]].sub0_sub1 418 ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY10]].sub0 419 ; GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY10]].sub1 420 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec 421 ; GFX12-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 422 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec 423 ; GFX12-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 424 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 425 %ret = call i64 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 426 %cast = bitcast i64 %ret to double 427 ret double %cast 428} 429 430; Natural mapping 431define amdgpu_ps void @raw_buffer_atomic_cmpswap_i64_noret__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset(i64 %val, i64 %cmp, <4 x i32> inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 432 ; GFX8-LABEL: name: raw_buffer_atomic_cmpswap_i64_noret__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset 433 ; GFX8: bb.1 (%ir-block.0): 434 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 435 ; GFX8-NEXT: {{ $}} 436 ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 437 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 438 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 439 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 440 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 441 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 442 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 443 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 444 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 445 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 446 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 447 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 448 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 449 ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 450 ; GFX8-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_OFFEN [[REG_SEQUENCE3]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) 451 ; GFX8-NEXT: S_ENDPGM 0 452 ; 453 ; GFX12-LABEL: name: raw_buffer_atomic_cmpswap_i64_noret__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset 454 ; GFX12: bb.1 (%ir-block.0): 455 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 456 ; GFX12-NEXT: {{ $}} 457 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 458 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 459 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 460 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 461 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 462 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 463 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 464 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 465 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 466 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 467 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 468 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 469 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 470 ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 471 ; GFX12-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_OFFEN [[REG_SEQUENCE3]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) 472 ; GFX12-NEXT: S_ENDPGM 0 473 %ret = call i64 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 474 ret void 475} 476 477; All operands need regbank legalization 478define amdgpu_ps double @raw_buffer_atomic_cmpswap_i64__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset(i64 inreg %val, i64 inreg %cmp, <4 x i32> %rsrc, i32 inreg %voffset, i32 %soffset) { 479 ; GFX8-LABEL: name: raw_buffer_atomic_cmpswap_i64__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset 480 ; GFX8: bb.1 (%ir-block.0): 481 ; GFX8-NEXT: successors: %bb.2(0x80000000) 482 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 483 ; GFX8-NEXT: {{ $}} 484 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 485 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 486 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 487 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 488 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 489 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 490 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 491 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 492 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 493 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 494 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 495 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 496 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr4 497 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] 498 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] 499 ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] 500 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 501 ; GFX8-NEXT: {{ $}} 502 ; GFX8-NEXT: bb.2: 503 ; GFX8-NEXT: successors: %bb.3(0x80000000) 504 ; GFX8-NEXT: {{ $}} 505 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 506 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 507 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec 508 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec 509 ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 510 ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 511 ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 512 ; GFX8-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE3]].sub0_sub1 513 ; GFX8-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE3]].sub2_sub3 514 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec 515 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec 516 ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc 517 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec 518 ; GFX8-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY9]], implicit $exec 519 ; GFX8-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc 520 ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 521 ; GFX8-NEXT: {{ $}} 522 ; GFX8-NEXT: bb.3: 523 ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) 524 ; GFX8-NEXT: {{ $}} 525 ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY10]], %subreg.sub0_sub1, [[COPY11]], %subreg.sub2_sub3 526 ; GFX8-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY12]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) 527 ; GFX8-NEXT: [[COPY17:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN]].sub0_sub1 528 ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 529 ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec 530 ; GFX8-NEXT: {{ $}} 531 ; GFX8-NEXT: bb.4: 532 ; GFX8-NEXT: successors: %bb.5(0x80000000) 533 ; GFX8-NEXT: {{ $}} 534 ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] 535 ; GFX8-NEXT: {{ $}} 536 ; GFX8-NEXT: bb.5: 537 ; GFX8-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY17]].sub0 538 ; GFX8-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY17]].sub1 539 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec 540 ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_5]] 541 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec 542 ; GFX8-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_6]] 543 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 544 ; 545 ; GFX12-LABEL: name: raw_buffer_atomic_cmpswap_i64__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset 546 ; GFX12: bb.1 (%ir-block.0): 547 ; GFX12-NEXT: successors: %bb.2(0x80000000) 548 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 549 ; GFX12-NEXT: {{ $}} 550 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 551 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 552 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 553 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 554 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 555 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 556 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 557 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 558 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 559 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 560 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 561 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 562 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr4 563 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] 564 ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] 565 ; GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] 566 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo 567 ; GFX12-NEXT: {{ $}} 568 ; GFX12-NEXT: bb.2: 569 ; GFX12-NEXT: successors: %bb.3(0x80000000) 570 ; GFX12-NEXT: {{ $}} 571 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 572 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 573 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec 574 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec 575 ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 576 ; GFX12-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 577 ; GFX12-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 578 ; GFX12-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE3]].sub0_sub1 579 ; GFX12-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE3]].sub2_sub3 580 ; GFX12-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec 581 ; GFX12-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec 582 ; GFX12-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc 583 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec 584 ; GFX12-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY9]], implicit $exec 585 ; GFX12-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc 586 ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec 587 ; GFX12-NEXT: {{ $}} 588 ; GFX12-NEXT: bb.3: 589 ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) 590 ; GFX12-NEXT: {{ $}} 591 ; GFX12-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY10]], %subreg.sub0_sub1, [[COPY11]], %subreg.sub2_sub3 592 ; GFX12-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_OFFEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY12]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) 593 ; GFX12-NEXT: [[COPY17:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_OFFEN_RTN]].sub0_sub1 594 ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc 595 ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec 596 ; GFX12-NEXT: {{ $}} 597 ; GFX12-NEXT: bb.4: 598 ; GFX12-NEXT: successors: %bb.5(0x80000000) 599 ; GFX12-NEXT: {{ $}} 600 ; GFX12-NEXT: $exec_lo = S_MOV_B32_term [[S_MOV_B32_]] 601 ; GFX12-NEXT: {{ $}} 602 ; GFX12-NEXT: bb.5: 603 ; GFX12-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY17]].sub0 604 ; GFX12-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY17]].sub1 605 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec 606 ; GFX12-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_5]] 607 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec 608 ; GFX12-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_6]] 609 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 610 %ret = call i64 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 611 %cast = bitcast i64 %ret to double 612 ret double %cast 613} 614 615; All operands need regbank legalization 616define amdgpu_ps void @raw_buffer_atomic_cmpswap_i64_noret__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset(i64 inreg %val, i64 inreg %cmp, <4 x i32> %rsrc, i32 inreg %voffset, i32 %soffset) { 617 ; GFX8-LABEL: name: raw_buffer_atomic_cmpswap_i64_noret__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset 618 ; GFX8: bb.1 (%ir-block.0): 619 ; GFX8-NEXT: successors: %bb.2(0x80000000) 620 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 621 ; GFX8-NEXT: {{ $}} 622 ; GFX8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 623 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 624 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 625 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 626 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 627 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 628 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 629 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 630 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 631 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 632 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 633 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 634 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr4 635 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] 636 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] 637 ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] 638 ; GFX8-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 639 ; GFX8-NEXT: {{ $}} 640 ; GFX8-NEXT: bb.2: 641 ; GFX8-NEXT: successors: %bb.3(0x80000000) 642 ; GFX8-NEXT: {{ $}} 643 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 644 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 645 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec 646 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec 647 ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 648 ; GFX8-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 649 ; GFX8-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 650 ; GFX8-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE3]].sub0_sub1 651 ; GFX8-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE3]].sub2_sub3 652 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec 653 ; GFX8-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec 654 ; GFX8-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc 655 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec 656 ; GFX8-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY9]], implicit $exec 657 ; GFX8-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc 658 ; GFX8-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 659 ; GFX8-NEXT: {{ $}} 660 ; GFX8-NEXT: bb.3: 661 ; GFX8-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) 662 ; GFX8-NEXT: {{ $}} 663 ; GFX8-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY10]], %subreg.sub0_sub1, [[COPY11]], %subreg.sub2_sub3 664 ; GFX8-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_OFFEN [[REG_SEQUENCE4]], [[COPY12]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) 665 ; GFX8-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 666 ; GFX8-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec 667 ; GFX8-NEXT: {{ $}} 668 ; GFX8-NEXT: bb.4: 669 ; GFX8-NEXT: successors: %bb.5(0x80000000) 670 ; GFX8-NEXT: {{ $}} 671 ; GFX8-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] 672 ; GFX8-NEXT: {{ $}} 673 ; GFX8-NEXT: bb.5: 674 ; GFX8-NEXT: S_ENDPGM 0 675 ; 676 ; GFX12-LABEL: name: raw_buffer_atomic_cmpswap_i64_noret__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset 677 ; GFX12: bb.1 (%ir-block.0): 678 ; GFX12-NEXT: successors: %bb.2(0x80000000) 679 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 680 ; GFX12-NEXT: {{ $}} 681 ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 682 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 683 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 684 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 685 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 686 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 687 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 688 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 689 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 690 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 691 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 692 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 693 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr4 694 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] 695 ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] 696 ; GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] 697 ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo 698 ; GFX12-NEXT: {{ $}} 699 ; GFX12-NEXT: bb.2: 700 ; GFX12-NEXT: successors: %bb.3(0x80000000) 701 ; GFX12-NEXT: {{ $}} 702 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 703 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 704 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec 705 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec 706 ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 707 ; GFX12-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 708 ; GFX12-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 709 ; GFX12-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE3]].sub0_sub1 710 ; GFX12-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE3]].sub2_sub3 711 ; GFX12-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec 712 ; GFX12-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec 713 ; GFX12-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc 714 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec 715 ; GFX12-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY9]], implicit $exec 716 ; GFX12-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc 717 ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_1]], implicit-def $exec, implicit-def $scc, implicit $exec 718 ; GFX12-NEXT: {{ $}} 719 ; GFX12-NEXT: bb.3: 720 ; GFX12-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) 721 ; GFX12-NEXT: {{ $}} 722 ; GFX12-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY10]], %subreg.sub0_sub1, [[COPY11]], %subreg.sub2_sub3 723 ; GFX12-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_OFFEN [[REG_SEQUENCE4]], [[COPY12]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) 724 ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc 725 ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec 726 ; GFX12-NEXT: {{ $}} 727 ; GFX12-NEXT: bb.4: 728 ; GFX12-NEXT: successors: %bb.5(0x80000000) 729 ; GFX12-NEXT: {{ $}} 730 ; GFX12-NEXT: $exec_lo = S_MOV_B32_term [[S_MOV_B32_]] 731 ; GFX12-NEXT: {{ $}} 732 ; GFX12-NEXT: bb.5: 733 ; GFX12-NEXT: S_ENDPGM 0 734 %ret = call i64 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 735 ret void 736} 737 738define amdgpu_ps double @raw_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4095(i64 %val, i64 %cmp, <4 x i32> inreg %rsrc, i32 %voffset.base, i32 inreg %soffset) { 739 ; GFX8-LABEL: name: raw_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4095 740 ; GFX8: bb.1 (%ir-block.0): 741 ; GFX8-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 742 ; GFX8-NEXT: {{ $}} 743 ; GFX8-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 744 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 745 ; GFX8-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 746 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 747 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 748 ; GFX8-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 749 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 750 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 751 ; GFX8-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 752 ; GFX8-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 753 ; GFX8-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 754 ; GFX8-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 755 ; GFX8-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 756 ; GFX8-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 757 ; GFX8-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN [[REG_SEQUENCE3]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) 758 ; GFX8-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN]].sub0_sub1 759 ; GFX8-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY10]].sub0 760 ; GFX8-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY10]].sub1 761 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec 762 ; GFX8-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 763 ; GFX8-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec 764 ; GFX8-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 765 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 766 ; 767 ; GFX12-LABEL: name: raw_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4095 768 ; GFX12: bb.1 (%ir-block.0): 769 ; GFX12-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 770 ; GFX12-NEXT: {{ $}} 771 ; GFX12-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 772 ; GFX12-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 773 ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 774 ; GFX12-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 775 ; GFX12-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 776 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 777 ; GFX12-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 778 ; GFX12-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 779 ; GFX12-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 780 ; GFX12-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 781 ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 782 ; GFX12-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 783 ; GFX12-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 784 ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 785 ; GFX12-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_OFFEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_OFFEN_RTN [[REG_SEQUENCE3]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s64), align 1, addrspace 8) 786 ; GFX12-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_VBUFFER_OFFEN_RTN]].sub0_sub1 787 ; GFX12-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY10]].sub0 788 ; GFX12-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY10]].sub1 789 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec 790 ; GFX12-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 791 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec 792 ; GFX12-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 793 ; GFX12-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 794 %voffset = add i32 %voffset.base, 4095 795 %ret = call i64 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0) 796 %cast = bitcast i64 %ret to double 797 ret double %cast 798} 799 800declare i32 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i32(i32, i32, <4 x i32>, i32, i32, i32 immarg) 801declare i64 @llvm.amdgcn.raw.buffer.atomic.cmpswap.i64(i64, i64, <4 x i32>, i32, i32, i32 immarg) 802