1faa2c678SKrzysztof Drewniak; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2faa2c678SKrzysztof Drewniak; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -stop-after=instruction-select -verify-machineinstrs -o - %s | FileCheck %s 3faa2c678SKrzysztof Drewniak 4faa2c678SKrzysztof Drewniak; Natural mapping 5faa2c678SKrzysztof Drewniakdefine amdgpu_ps float @raw_ptr_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset(i32 %val, i32 %cmp, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 6faa2c678SKrzysztof Drewniak ; CHECK-LABEL: name: raw_ptr_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset 7faa2c678SKrzysztof Drewniak ; CHECK: bb.1 (%ir-block.0): 8faa2c678SKrzysztof Drewniak ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 9faa2c678SKrzysztof Drewniak ; CHECK-NEXT: {{ $}} 10faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 11faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 12faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 13faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 14faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 15faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 16faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 17faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 18faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 19faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 20ab379378SKrzysztof Drewniak ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) 21faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 22faa2c678SKrzysztof Drewniak ; CHECK-NEXT: $vgpr0 = COPY [[COPY8]] 23faa2c678SKrzysztof Drewniak ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 24faa2c678SKrzysztof Drewniak %ret = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 25faa2c678SKrzysztof Drewniak %cast = bitcast i32 %ret to float 26faa2c678SKrzysztof Drewniak ret float %cast 27faa2c678SKrzysztof Drewniak} 28faa2c678SKrzysztof Drewniak 29faa2c678SKrzysztof Drewniak; Natural mapping 30faa2c678SKrzysztof Drewniakdefine amdgpu_ps void @raw_ptr_buffer_atomic_cmpswap_i32_noret__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset(i32 %val, i32 %cmp, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 31faa2c678SKrzysztof Drewniak ; CHECK-LABEL: name: raw_ptr_buffer_atomic_cmpswap_i32_noret__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset 32faa2c678SKrzysztof Drewniak ; CHECK: bb.1 (%ir-block.0): 33faa2c678SKrzysztof Drewniak ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 34faa2c678SKrzysztof Drewniak ; CHECK-NEXT: {{ $}} 35faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 36faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 37faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 38faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 39faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 40faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 41faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 42faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 43faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 44faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 45ab379378SKrzysztof Drewniak ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) 46faa2c678SKrzysztof Drewniak ; CHECK-NEXT: S_ENDPGM 0 47faa2c678SKrzysztof Drewniak %ret = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 48faa2c678SKrzysztof Drewniak ret void 49faa2c678SKrzysztof Drewniak} 50faa2c678SKrzysztof Drewniak 51faa2c678SKrzysztof Drewniak; All operands need regbank legalization 52faa2c678SKrzysztof Drewniakdefine amdgpu_ps float @raw_ptr_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset(i32 inreg %val, i32 inreg %cmp, ptr addrspace(8) %rsrc, i32 inreg %voffset, i32 %soffset) { 53faa2c678SKrzysztof Drewniak ; CHECK-LABEL: name: raw_ptr_buffer_atomic_cmpswap_i32__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset 54faa2c678SKrzysztof Drewniak ; CHECK: bb.1 (%ir-block.0): 55faa2c678SKrzysztof Drewniak ; CHECK-NEXT: successors: %bb.2(0x80000000) 56faa2c678SKrzysztof Drewniak ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 57faa2c678SKrzysztof Drewniak ; CHECK-NEXT: {{ $}} 58faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 59faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 60faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 61faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 62faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 63faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr3 64faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 65faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 66faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 67faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] 68faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] 69faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] 70faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 71faa2c678SKrzysztof Drewniak ; CHECK-NEXT: {{ $}} 72faa2c678SKrzysztof Drewniak ; CHECK-NEXT: bb.2: 73faa2c678SKrzysztof Drewniak ; CHECK-NEXT: successors: %bb.3(0x80000000) 74faa2c678SKrzysztof Drewniak ; CHECK-NEXT: {{ $}} 75faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec 76faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec 77faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 78faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 79faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 80faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 81faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 82faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 83faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 84faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec 85faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec 86c3cfbbc4Spvanhout ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc 87faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec 88faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec 89c3cfbbc4Spvanhout ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc 90faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 91faa2c678SKrzysztof Drewniak ; CHECK-NEXT: {{ $}} 92faa2c678SKrzysztof Drewniak ; CHECK-NEXT: bb.3: 93faa2c678SKrzysztof Drewniak ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) 94faa2c678SKrzysztof Drewniak ; CHECK-NEXT: {{ $}} 95faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 96ab379378SKrzysztof Drewniak ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE2]], [[COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) 97faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 98faa2c678SKrzysztof Drewniak ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 99faa2c678SKrzysztof Drewniak ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec 100faa2c678SKrzysztof Drewniak ; CHECK-NEXT: {{ $}} 101faa2c678SKrzysztof Drewniak ; CHECK-NEXT: bb.4: 102faa2c678SKrzysztof Drewniak ; CHECK-NEXT: successors: %bb.5(0x80000000) 103faa2c678SKrzysztof Drewniak ; CHECK-NEXT: {{ $}} 104faa2c678SKrzysztof Drewniak ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] 105faa2c678SKrzysztof Drewniak ; CHECK-NEXT: {{ $}} 106faa2c678SKrzysztof Drewniak ; CHECK-NEXT: bb.5: 107faa2c678SKrzysztof Drewniak ; CHECK-NEXT: $vgpr0 = COPY [[COPY15]] 108faa2c678SKrzysztof Drewniak ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 109faa2c678SKrzysztof Drewniak %ret = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 110faa2c678SKrzysztof Drewniak %cast = bitcast i32 %ret to float 111faa2c678SKrzysztof Drewniak ret float %cast 112faa2c678SKrzysztof Drewniak} 113faa2c678SKrzysztof Drewniak 114faa2c678SKrzysztof Drewniak; All operands need regbank legalization 115faa2c678SKrzysztof Drewniakdefine amdgpu_ps void @raw_ptr_buffer_atomic_cmpswap_i32_noret__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset(i32 inreg %val, i32 inreg %cmp, ptr addrspace(8) %rsrc, i32 inreg %voffset, i32 %soffset) { 116faa2c678SKrzysztof Drewniak ; CHECK-LABEL: name: raw_ptr_buffer_atomic_cmpswap_i32_noret__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset 117faa2c678SKrzysztof Drewniak ; CHECK: bb.1 (%ir-block.0): 118faa2c678SKrzysztof Drewniak ; CHECK-NEXT: successors: %bb.2(0x80000000) 119faa2c678SKrzysztof Drewniak ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 120faa2c678SKrzysztof Drewniak ; CHECK-NEXT: {{ $}} 121faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 122faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 123faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0 124faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 125faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr2 126faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr3 127faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 128faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr4 129faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 130faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY]] 131faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[COPY1]] 132faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[COPY6]] 133faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 134faa2c678SKrzysztof Drewniak ; CHECK-NEXT: {{ $}} 135faa2c678SKrzysztof Drewniak ; CHECK-NEXT: bb.2: 136faa2c678SKrzysztof Drewniak ; CHECK-NEXT: successors: %bb.3(0x80000000) 137faa2c678SKrzysztof Drewniak ; CHECK-NEXT: {{ $}} 138faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY2]], implicit $exec 139faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY3]], implicit $exec 140faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 141faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 142faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 143faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 144faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3 145faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY13:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub0_sub1 146faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY14:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE1]].sub2_sub3 147faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY13]], [[COPY11]], implicit $exec 148faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY14]], [[COPY12]], implicit $exec 149c3cfbbc4Spvanhout ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc 150faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec 151faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY7]], implicit $exec 152c3cfbbc4Spvanhout ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc 153faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 154faa2c678SKrzysztof Drewniak ; CHECK-NEXT: {{ $}} 155faa2c678SKrzysztof Drewniak ; CHECK-NEXT: bb.3: 156faa2c678SKrzysztof Drewniak ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) 157faa2c678SKrzysztof Drewniak ; CHECK-NEXT: {{ $}} 158faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY8]], %subreg.sub0, [[COPY9]], %subreg.sub1 159ab379378SKrzysztof Drewniak ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_OFFEN [[REG_SEQUENCE2]], [[COPY10]], [[REG_SEQUENCE1]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) 160faa2c678SKrzysztof Drewniak ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 161faa2c678SKrzysztof Drewniak ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec 162faa2c678SKrzysztof Drewniak ; CHECK-NEXT: {{ $}} 163faa2c678SKrzysztof Drewniak ; CHECK-NEXT: bb.4: 164faa2c678SKrzysztof Drewniak ; CHECK-NEXT: successors: %bb.5(0x80000000) 165faa2c678SKrzysztof Drewniak ; CHECK-NEXT: {{ $}} 166faa2c678SKrzysztof Drewniak ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] 167faa2c678SKrzysztof Drewniak ; CHECK-NEXT: {{ $}} 168faa2c678SKrzysztof Drewniak ; CHECK-NEXT: bb.5: 169faa2c678SKrzysztof Drewniak ; CHECK-NEXT: S_ENDPGM 0 170faa2c678SKrzysztof Drewniak %ret = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 171faa2c678SKrzysztof Drewniak ret void 172faa2c678SKrzysztof Drewniak} 173faa2c678SKrzysztof Drewniak 174faa2c678SKrzysztof Drewniakdefine amdgpu_ps float @raw_ptr_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4095(i32 %val, i32 %cmp, ptr addrspace(8) inreg %rsrc, i32 %voffset.base, i32 inreg %soffset) { 175faa2c678SKrzysztof Drewniak ; CHECK-LABEL: name: raw_ptr_buffer_atomic_cmpswap_i32__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4095 176faa2c678SKrzysztof Drewniak ; CHECK: bb.1 (%ir-block.0): 177faa2c678SKrzysztof Drewniak ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2 178faa2c678SKrzysztof Drewniak ; CHECK-NEXT: {{ $}} 179faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 180faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 181faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr2 182faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr3 183faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr4 184faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr5 185faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 186faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6 187faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1, [[COPY4]], %subreg.sub2, [[COPY5]], %subreg.sub3 188faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 189ab379378SKrzysztof Drewniak ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN:%[0-9]+]]:vreg_64 = BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s32) on %ir.rsrc, align 1, addrspace 8) 190faa2c678SKrzysztof Drewniak ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_ATOMIC_CMPSWAP_OFFEN_RTN]].sub0 191faa2c678SKrzysztof Drewniak ; CHECK-NEXT: $vgpr0 = COPY [[COPY8]] 192faa2c678SKrzysztof Drewniak ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 193faa2c678SKrzysztof Drewniak %voffset = add i32 %voffset.base, 4095 194faa2c678SKrzysztof Drewniak %ret = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 %val, i32 %cmp, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 195faa2c678SKrzysztof Drewniak %cast = bitcast i32 %ret to float 196faa2c678SKrzysztof Drewniak ret float %cast 197faa2c678SKrzysztof Drewniak} 198faa2c678SKrzysztof Drewniak 199*101008beSJay Foad; Natural mapping 200*101008beSJay Foaddefine amdgpu_ps double @raw_ptr_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset(i64 %val, i64 %cmp, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 201*101008beSJay Foad ; CHECK-LABEL: name: raw_ptr_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset 202*101008beSJay Foad ; CHECK: bb.1 (%ir-block.0): 203*101008beSJay Foad ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 204*101008beSJay Foad ; CHECK-NEXT: {{ $}} 205*101008beSJay Foad ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 206*101008beSJay Foad ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 207*101008beSJay Foad ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 208*101008beSJay Foad ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 209*101008beSJay Foad ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 210*101008beSJay Foad ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 211*101008beSJay Foad ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 212*101008beSJay Foad ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 213*101008beSJay Foad ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 214*101008beSJay Foad ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 215*101008beSJay Foad ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 216*101008beSJay Foad ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 217*101008beSJay Foad ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 218*101008beSJay Foad ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 219*101008beSJay Foad ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN [[REG_SEQUENCE3]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) 220*101008beSJay Foad ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN]].sub0_sub1 221*101008beSJay Foad ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY10]].sub0 222*101008beSJay Foad ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY10]].sub1 223*101008beSJay Foad ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec 224*101008beSJay Foad ; CHECK-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 225*101008beSJay Foad ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec 226*101008beSJay Foad ; CHECK-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 227*101008beSJay Foad ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 228*101008beSJay Foad %ret = call i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 229*101008beSJay Foad %cast = bitcast i64 %ret to double 230*101008beSJay Foad ret double %cast 231*101008beSJay Foad} 232faa2c678SKrzysztof Drewniak 233*101008beSJay Foad; Natural mapping 234*101008beSJay Foaddefine amdgpu_ps void @raw_ptr_buffer_atomic_cmpswap_i64_noret__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset(i64 %val, i64 %cmp, ptr addrspace(8) inreg %rsrc, i32 %voffset, i32 inreg %soffset) { 235*101008beSJay Foad ; CHECK-LABEL: name: raw_ptr_buffer_atomic_cmpswap_i64_noret__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset 236*101008beSJay Foad ; CHECK: bb.1 (%ir-block.0): 237*101008beSJay Foad ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 238*101008beSJay Foad ; CHECK-NEXT: {{ $}} 239*101008beSJay Foad ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 240*101008beSJay Foad ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 241*101008beSJay Foad ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 242*101008beSJay Foad ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 243*101008beSJay Foad ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 244*101008beSJay Foad ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 245*101008beSJay Foad ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 246*101008beSJay Foad ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 247*101008beSJay Foad ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 248*101008beSJay Foad ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 249*101008beSJay Foad ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 250*101008beSJay Foad ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 251*101008beSJay Foad ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 252*101008beSJay Foad ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 253*101008beSJay Foad ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_OFFEN [[REG_SEQUENCE3]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) 254*101008beSJay Foad ; CHECK-NEXT: S_ENDPGM 0 255*101008beSJay Foad %ret = call i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 256*101008beSJay Foad ret void 257*101008beSJay Foad} 258faa2c678SKrzysztof Drewniak 259*101008beSJay Foad; All operands need regbank legalization 260*101008beSJay Foaddefine amdgpu_ps double @raw_ptr_buffer_atomic_cmpswap_i64__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset(i64 inreg %val, i64 inreg %cmp, ptr addrspace(8) %rsrc, i32 inreg %voffset, i32 %soffset) { 261*101008beSJay Foad ; CHECK-LABEL: name: raw_ptr_buffer_atomic_cmpswap_i64__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset 262*101008beSJay Foad ; CHECK: bb.1 (%ir-block.0): 263*101008beSJay Foad ; CHECK-NEXT: successors: %bb.2(0x80000000) 264*101008beSJay Foad ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 265*101008beSJay Foad ; CHECK-NEXT: {{ $}} 266*101008beSJay Foad ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 267*101008beSJay Foad ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 268*101008beSJay Foad ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 269*101008beSJay Foad ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 270*101008beSJay Foad ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 271*101008beSJay Foad ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 272*101008beSJay Foad ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 273*101008beSJay Foad ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 274*101008beSJay Foad ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 275*101008beSJay Foad ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 276*101008beSJay Foad ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 277*101008beSJay Foad ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr4 278*101008beSJay Foad ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 279*101008beSJay Foad ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] 280*101008beSJay Foad ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] 281*101008beSJay Foad ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] 282*101008beSJay Foad ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 283*101008beSJay Foad ; CHECK-NEXT: {{ $}} 284*101008beSJay Foad ; CHECK-NEXT: bb.2: 285*101008beSJay Foad ; CHECK-NEXT: successors: %bb.3(0x80000000) 286*101008beSJay Foad ; CHECK-NEXT: {{ $}} 287*101008beSJay Foad ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 288*101008beSJay Foad ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 289*101008beSJay Foad ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec 290*101008beSJay Foad ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec 291*101008beSJay Foad ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 292*101008beSJay Foad ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 293*101008beSJay Foad ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 294*101008beSJay Foad ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE3]].sub0_sub1 295*101008beSJay Foad ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE3]].sub2_sub3 296*101008beSJay Foad ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec 297*101008beSJay Foad ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec 298*101008beSJay Foad ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc 299*101008beSJay Foad ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec 300*101008beSJay Foad ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY9]], implicit $exec 301*101008beSJay Foad ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc 302*101008beSJay Foad ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 303*101008beSJay Foad ; CHECK-NEXT: {{ $}} 304*101008beSJay Foad ; CHECK-NEXT: bb.3: 305*101008beSJay Foad ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) 306*101008beSJay Foad ; CHECK-NEXT: {{ $}} 307*101008beSJay Foad ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY10]], %subreg.sub0_sub1, [[COPY11]], %subreg.sub2_sub3 308*101008beSJay Foad ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN [[REG_SEQUENCE4]], [[COPY12]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) 309*101008beSJay Foad ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN]].sub0_sub1 310*101008beSJay Foad ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 311*101008beSJay Foad ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec 312*101008beSJay Foad ; CHECK-NEXT: {{ $}} 313*101008beSJay Foad ; CHECK-NEXT: bb.4: 314*101008beSJay Foad ; CHECK-NEXT: successors: %bb.5(0x80000000) 315*101008beSJay Foad ; CHECK-NEXT: {{ $}} 316*101008beSJay Foad ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] 317*101008beSJay Foad ; CHECK-NEXT: {{ $}} 318*101008beSJay Foad ; CHECK-NEXT: bb.5: 319*101008beSJay Foad ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[COPY17]].sub0 320*101008beSJay Foad ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vgpr_32 = COPY [[COPY17]].sub1 321*101008beSJay Foad ; CHECK-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY18]], implicit $exec 322*101008beSJay Foad ; CHECK-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_5]] 323*101008beSJay Foad ; CHECK-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY19]], implicit $exec 324*101008beSJay Foad ; CHECK-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_6]] 325*101008beSJay Foad ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 326*101008beSJay Foad %ret = call i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 327*101008beSJay Foad %cast = bitcast i64 %ret to double 328*101008beSJay Foad ret double %cast 329*101008beSJay Foad} 330faa2c678SKrzysztof Drewniak 331*101008beSJay Foad; All operands need regbank legalization 332*101008beSJay Foaddefine amdgpu_ps void @raw_ptr_buffer_atomic_cmpswap_i64_noret__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset(i64 inreg %val, i64 inreg %cmp, ptr addrspace(8) %rsrc, i32 inreg %voffset, i32 %soffset) { 333*101008beSJay Foad ; CHECK-LABEL: name: raw_ptr_buffer_atomic_cmpswap_i64_noret__sgpr_val__sgpr_cmp__vgpr_rsrc__sgpr_voffset__vgpr_soffset 334*101008beSJay Foad ; CHECK: bb.1 (%ir-block.0): 335*101008beSJay Foad ; CHECK-NEXT: successors: %bb.2(0x80000000) 336*101008beSJay Foad ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 337*101008beSJay Foad ; CHECK-NEXT: {{ $}} 338*101008beSJay Foad ; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2 339*101008beSJay Foad ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3 340*101008beSJay Foad ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 341*101008beSJay Foad ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sreg_32 = COPY $sgpr4 342*101008beSJay Foad ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sreg_32 = COPY $sgpr5 343*101008beSJay Foad ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 344*101008beSJay Foad ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 345*101008beSJay Foad ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1 346*101008beSJay Foad ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY $vgpr2 347*101008beSJay Foad ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY $vgpr3 348*101008beSJay Foad ; CHECK-NEXT: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6 349*101008beSJay Foad ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY $vgpr4 350*101008beSJay Foad ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 351*101008beSJay Foad ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] 352*101008beSJay Foad ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] 353*101008beSJay Foad ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY8]] 354*101008beSJay Foad ; CHECK-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec 355*101008beSJay Foad ; CHECK-NEXT: {{ $}} 356*101008beSJay Foad ; CHECK-NEXT: bb.2: 357*101008beSJay Foad ; CHECK-NEXT: successors: %bb.3(0x80000000) 358*101008beSJay Foad ; CHECK-NEXT: {{ $}} 359*101008beSJay Foad ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec 360*101008beSJay Foad ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY5]], implicit $exec 361*101008beSJay Foad ; CHECK-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec 362*101008beSJay Foad ; CHECK-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY7]], implicit $exec 363*101008beSJay Foad ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 364*101008beSJay Foad ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]].sub0_sub1 365*101008beSJay Foad ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE2]].sub2_sub3 366*101008beSJay Foad ; CHECK-NEXT: [[COPY15:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE3]].sub0_sub1 367*101008beSJay Foad ; CHECK-NEXT: [[COPY16:%[0-9]+]]:sreg_64 = COPY [[REG_SEQUENCE3]].sub2_sub3 368*101008beSJay Foad ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY15]], [[COPY13]], implicit $exec 369*101008beSJay Foad ; CHECK-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[COPY16]], [[COPY14]], implicit $exec 370*101008beSJay Foad ; CHECK-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc 371*101008beSJay Foad ; CHECK-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY9]], implicit $exec 372*101008beSJay Foad ; CHECK-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY9]], implicit $exec 373*101008beSJay Foad ; CHECK-NEXT: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[S_AND_B64_]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc 374*101008beSJay Foad ; CHECK-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec 375*101008beSJay Foad ; CHECK-NEXT: {{ $}} 376*101008beSJay Foad ; CHECK-NEXT: bb.3: 377*101008beSJay Foad ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) 378*101008beSJay Foad ; CHECK-NEXT: {{ $}} 379*101008beSJay Foad ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY10]], %subreg.sub0_sub1, [[COPY11]], %subreg.sub2_sub3 380*101008beSJay Foad ; CHECK-NEXT: BUFFER_ATOMIC_CMPSWAP_X2_OFFEN [[REG_SEQUENCE4]], [[COPY12]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) 381*101008beSJay Foad ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc 382*101008beSJay Foad ; CHECK-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec 383*101008beSJay Foad ; CHECK-NEXT: {{ $}} 384*101008beSJay Foad ; CHECK-NEXT: bb.4: 385*101008beSJay Foad ; CHECK-NEXT: successors: %bb.5(0x80000000) 386*101008beSJay Foad ; CHECK-NEXT: {{ $}} 387*101008beSJay Foad ; CHECK-NEXT: $exec = S_MOV_B64_term [[S_MOV_B64_]] 388*101008beSJay Foad ; CHECK-NEXT: {{ $}} 389*101008beSJay Foad ; CHECK-NEXT: bb.5: 390*101008beSJay Foad ; CHECK-NEXT: S_ENDPGM 0 391*101008beSJay Foad %ret = call i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 392*101008beSJay Foad ret void 393*101008beSJay Foad} 394faa2c678SKrzysztof Drewniak 395*101008beSJay Foaddefine amdgpu_ps double @raw_ptr_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4095(i64 %val, i64 %cmp, ptr addrspace(8) inreg %rsrc, i32 %voffset.base, i32 inreg %soffset) { 396*101008beSJay Foad ; CHECK-LABEL: name: raw_ptr_buffer_atomic_cmpswap_i64__vgpr_val__vgpr_cmp__sgpr_rsrc__vgpr_voffset__sgpr_soffset__voffset_add4095 397*101008beSJay Foad ; CHECK: bb.1 (%ir-block.0): 398*101008beSJay Foad ; CHECK-NEXT: liveins: $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 399*101008beSJay Foad ; CHECK-NEXT: {{ $}} 400*101008beSJay Foad ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 401*101008beSJay Foad ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 402*101008beSJay Foad ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 403*101008beSJay Foad ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 404*101008beSJay Foad ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 405*101008beSJay Foad ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 406*101008beSJay Foad ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY $sgpr2 407*101008beSJay Foad ; CHECK-NEXT: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr3 408*101008beSJay Foad ; CHECK-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr4 409*101008beSJay Foad ; CHECK-NEXT: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr5 410*101008beSJay Foad ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY $vgpr4 411*101008beSJay Foad ; CHECK-NEXT: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6 412*101008beSJay Foad ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3 413*101008beSJay Foad ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[REG_SEQUENCE]], %subreg.sub0_sub1, [[REG_SEQUENCE1]], %subreg.sub2_sub3 414*101008beSJay Foad ; CHECK-NEXT: [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN:%[0-9]+]]:vreg_128 = BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN [[REG_SEQUENCE3]], [[COPY8]], [[REG_SEQUENCE2]], [[COPY9]], 4095, 1, implicit $exec :: (volatile dereferenceable load store (s64) on %ir.rsrc, align 1, addrspace 8) 415*101008beSJay Foad ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_64 = COPY [[BUFFER_ATOMIC_CMPSWAP_X2_OFFEN_RTN]].sub0_sub1 416*101008beSJay Foad ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY [[COPY10]].sub0 417*101008beSJay Foad ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY [[COPY10]].sub1 418*101008beSJay Foad ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY11]], implicit $exec 419*101008beSJay Foad ; CHECK-NEXT: $sgpr0 = COPY [[V_READFIRSTLANE_B32_]] 420*101008beSJay Foad ; CHECK-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32 = V_READFIRSTLANE_B32 [[COPY12]], implicit $exec 421*101008beSJay Foad ; CHECK-NEXT: $sgpr1 = COPY [[V_READFIRSTLANE_B32_1]] 422*101008beSJay Foad ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 423*101008beSJay Foad %voffset = add i32 %voffset.base, 4095 424*101008beSJay Foad %ret = call i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64 %val, i64 %cmp, ptr addrspace(8) %rsrc, i32 %voffset, i32 %soffset, i32 0) 425*101008beSJay Foad %cast = bitcast i64 %ret to double 426*101008beSJay Foad ret double %cast 427*101008beSJay Foad} 428*101008beSJay Foad 429*101008beSJay Foaddeclare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32, i32, ptr addrspace(8), i32, i32, i32 immarg) 430*101008beSJay Foaddeclare i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64, i64, ptr addrspace(8), i32, i32, i32 immarg) 431