1*9e9907f1SFangrui Song;RUN: llc < %s -mtriple=amdgcn -mcpu=verde -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs | FileCheck %s 2*9e9907f1SFangrui Song;RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs | FileCheck %s 3faa2c678SKrzysztof Drewniak 4faa2c678SKrzysztof Drewniak;CHECK-LABEL: {{^}}test1: 5faa2c678SKrzysztof Drewniak;CHECK-NOT: s_waitcnt 6faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_swap v0, off, s[0:3], 0 glc 7faa2c678SKrzysztof Drewniak;CHECK: s_movk_i32 [[SOFS:s[0-9]+]], 0x1ffc 8faa2c678SKrzysztof Drewniak;CHECK: s_waitcnt vmcnt(0) 9faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_swap v0, v1, s[0:3], 0 offen glc 10faa2c678SKrzysztof Drewniak;CHECK: s_waitcnt vmcnt(0) 11faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_swap v0, v1, s[0:3], 0 offen offset:42 glc 12faa2c678SKrzysztof Drewniak;CHECK-DAG: s_waitcnt vmcnt(0) 13faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_swap v0, off, s[0:3], [[SOFS]] offset:4 glc 14faa2c678SKrzysztof Drewniak;CHECK: s_waitcnt vmcnt(0) 15faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_swap v0, off, s[0:3], 0{{$}} 16faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_swap v0, off, s[0:3], 0 glc 17faa2c678SKrzysztof Drewniakdefine amdgpu_ps float @test1(ptr addrspace(8) inreg %rsrc, i32 %data, i32 %voffset) { 18faa2c678SKrzysztof Drewniakmain_body: 19faa2c678SKrzysztof Drewniak %o1 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.swap.i32(i32 %data, ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 20faa2c678SKrzysztof Drewniak %o3 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.swap.i32(i32 %o1, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 0) 21faa2c678SKrzysztof Drewniak %off5 = add i32 %voffset, 42 22faa2c678SKrzysztof Drewniak %o5 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.swap.i32(i32 %o3, ptr addrspace(8) %rsrc, i32 %off5, i32 0, i32 0) 23faa2c678SKrzysztof Drewniak %o6 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.swap.i32(i32 %o5, ptr addrspace(8) %rsrc, i32 4, i32 8188, i32 0) 24faa2c678SKrzysztof Drewniak %unused = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.swap.i32(i32 %o6, ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 25faa2c678SKrzysztof Drewniak %o7 = bitcast i32 %o6 to float 26faa2c678SKrzysztof Drewniak %out = call float @llvm.amdgcn.raw.ptr.buffer.atomic.swap.f32(float %o7, ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 27faa2c678SKrzysztof Drewniak ret float %out 28faa2c678SKrzysztof Drewniak} 29faa2c678SKrzysztof Drewniak 30faa2c678SKrzysztof Drewniak;CHECK-LABEL: {{^}}test2: 31faa2c678SKrzysztof Drewniak;CHECK-NOT: s_waitcnt 32faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_add v0, v1, s[0:3], 0 offen glc{{$}} 33faa2c678SKrzysztof Drewniak;CHECK: s_waitcnt vmcnt(0) 34faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_sub v0, v1, s[0:3], 0 offen glc slc 35faa2c678SKrzysztof Drewniak;CHECK: s_waitcnt vmcnt(0) 36faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_smin v0, v1, s[0:3], 0 offen glc{{$}} 37faa2c678SKrzysztof Drewniak;CHECK: s_waitcnt vmcnt(0) 38faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_umin v0, v1, s[0:3], 0 offen glc slc 39faa2c678SKrzysztof Drewniak;CHECK: s_waitcnt vmcnt(0) 40faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_smax v0, v1, s[0:3], 0 offen glc{{$}} 41faa2c678SKrzysztof Drewniak;CHECK: s_waitcnt vmcnt(0) 42faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_umax v0, v1, s[0:3], 0 offen glc slc 43faa2c678SKrzysztof Drewniak;CHECK: s_waitcnt vmcnt(0) 44faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_and v0, v1, s[0:3], 0 offen glc{{$}} 45faa2c678SKrzysztof Drewniak;CHECK: s_waitcnt vmcnt(0) 46faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_or v0, v1, s[0:3], 0 offen glc slc 47faa2c678SKrzysztof Drewniak;CHECK: s_waitcnt vmcnt(0) 48faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_xor v0, v1, s[0:3], 0 offen glc 49faa2c678SKrzysztof Drewniak;CHECK: s_waitcnt vmcnt(0) 50faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_inc v0, v1, s[0:3], 0 offen glc 51faa2c678SKrzysztof Drewniak;CHECK: s_waitcnt vmcnt(0) 52faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_dec v0, v1, s[0:3], 0 offen glc 53faa2c678SKrzysztof Drewniakdefine amdgpu_ps float @test2(ptr addrspace(8) inreg %rsrc, i32 %data, i32 %voffset) { 54faa2c678SKrzysztof Drewniakmain_body: 55faa2c678SKrzysztof Drewniak %t1 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 %data, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 0) 56faa2c678SKrzysztof Drewniak %t2 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.sub.i32(i32 %t1, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 2) 57faa2c678SKrzysztof Drewniak %t3 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smin.i32(i32 %t2, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 0) 58faa2c678SKrzysztof Drewniak %t4 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umin.i32(i32 %t3, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 2) 59faa2c678SKrzysztof Drewniak %t5 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smax.i32(i32 %t4, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 0) 60faa2c678SKrzysztof Drewniak %t6 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umax.i32(i32 %t5, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 2) 61faa2c678SKrzysztof Drewniak %t7 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.and.i32(i32 %t6, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 0) 62faa2c678SKrzysztof Drewniak %t8 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.or.i32(i32 %t7, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 2) 63faa2c678SKrzysztof Drewniak %t9 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.xor.i32(i32 %t8, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 0) 64faa2c678SKrzysztof Drewniak %t10 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.inc.i32(i32 %t9, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 0) 65faa2c678SKrzysztof Drewniak %t11 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.dec.i32(i32 %t10, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 0) 66faa2c678SKrzysztof Drewniak %out = bitcast i32 %t11 to float 67faa2c678SKrzysztof Drewniak ret float %out 68faa2c678SKrzysztof Drewniak} 69faa2c678SKrzysztof Drewniak 70faa2c678SKrzysztof Drewniak; Ideally, we would teach tablegen & friends that cmpswap only modifies the 71faa2c678SKrzysztof Drewniak; first vgpr. Since we don't do that yet, the register allocator will have to 72faa2c678SKrzysztof Drewniak; create copies which we don't bother to track here. 73faa2c678SKrzysztof Drewniak; 74faa2c678SKrzysztof Drewniak;CHECK-LABEL: {{^}}test3: 75faa2c678SKrzysztof Drewniak;CHECK-NOT: s_waitcnt 76faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 glc 77faa2c678SKrzysztof Drewniak;CHECK: s_waitcnt vmcnt(0) 78faa2c678SKrzysztof Drewniak;CHECK: s_movk_i32 [[SOFS:s[0-9]+]], 0x1ffc 79faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, v2, s[0:3], 0 offen glc 80faa2c678SKrzysztof Drewniak;CHECK: s_waitcnt vmcnt(0) 81faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, v2, s[0:3], 0 offen offset:44 glc 82faa2c678SKrzysztof Drewniak;CHECK-DAG: s_waitcnt vmcnt(0) 83faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[SOFS]] offset:4 glc 84faa2c678SKrzysztof Drewniakdefine amdgpu_ps float @test3(ptr addrspace(8) inreg %rsrc, i32 %data, i32 %cmp, i32 %vindex, i32 %voffset) { 85faa2c678SKrzysztof Drewniakmain_body: 86faa2c678SKrzysztof Drewniak %o1 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 %data, i32 %cmp, ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 87faa2c678SKrzysztof Drewniak %o3 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 %o1, i32 %cmp, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 0) 88faa2c678SKrzysztof Drewniak %ofs.5 = add i32 %voffset, 44 89faa2c678SKrzysztof Drewniak %o5 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 %o3, i32 %cmp, ptr addrspace(8) %rsrc, i32 %ofs.5, i32 0, i32 0) 90faa2c678SKrzysztof Drewniak %o6 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 %o5, i32 %cmp, ptr addrspace(8) %rsrc, i32 4, i32 8188, i32 0) 91faa2c678SKrzysztof Drewniak 92faa2c678SKrzysztof Drewniak; Detecting the no-return variant doesn't work right now because of how the 93faa2c678SKrzysztof Drewniak; intrinsic is replaced by an instruction that feeds into an EXTRACT_SUBREG. 94faa2c678SKrzysztof Drewniak; Since there probably isn't a reasonable use-case of cmpswap that discards 95faa2c678SKrzysztof Drewniak; the return value, that seems okay. 96faa2c678SKrzysztof Drewniak; 97faa2c678SKrzysztof Drewniak; %unused = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32 %o6, i32 %cmp, ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 98faa2c678SKrzysztof Drewniak %out = bitcast i32 %o6 to float 99faa2c678SKrzysztof Drewniak ret float %out 100faa2c678SKrzysztof Drewniak} 101faa2c678SKrzysztof Drewniak 102faa2c678SKrzysztof Drewniak;CHECK-LABEL: {{^}}test4: 103faa2c678SKrzysztof Drewniak;CHECK: buffer_atomic_add v0, 104faa2c678SKrzysztof Drewniakdefine amdgpu_ps float @test4() { 105faa2c678SKrzysztof Drewniakmain_body: 106faa2c678SKrzysztof Drewniak %v = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 1, ptr addrspace(8) undef, i32 4, i32 0, i32 0) 107faa2c678SKrzysztof Drewniak %v.float = bitcast i32 %v to float 108faa2c678SKrzysztof Drewniak ret float %v.float 109faa2c678SKrzysztof Drewniak} 110faa2c678SKrzysztof Drewniak 111101008beSJay Foad;CHECK-LABEL: {{^}}test5: 112101008beSJay Foad;CHECK-NOT: s_waitcnt 113101008beSJay Foad;CHECK: buffer_atomic_cmpswap_x2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 0 glc 114101008beSJay Foad;CHECK-DAG: s_waitcnt vmcnt(0) 115101008beSJay Foad;CHECK-DAG: s_movk_i32 [[SOFS:s[0-9]+]], 0x1ffc 116101008beSJay Foad;CHECK: buffer_atomic_cmpswap_x2 {{v\[[0-9]+:[0-9]+\]}}, v4, s[0:3], 0 offen glc 117101008beSJay Foad;CHECK: s_waitcnt vmcnt(0) 118101008beSJay Foad;CHECK: buffer_atomic_cmpswap_x2 {{v\[[0-9]+:[0-9]+\]}}, v4, s[0:3], 0 offen offset:44 glc 119101008beSJay Foad;CHECK-DAG: s_waitcnt vmcnt(0) 120101008beSJay Foad;CHECK: buffer_atomic_cmpswap_x2 {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[SOFS]] offset:4 glc 121101008beSJay Foaddefine amdgpu_ps float @test5(ptr addrspace(8) inreg %rsrc, i64 %data, i64 %cmp, i32 %vindex, i32 %voffset) { 122101008beSJay Foadmain_body: 123101008beSJay Foad %o1 = call i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64 %data, i64 %cmp, ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0) 124101008beSJay Foad %o3 = call i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64 %o1, i64 %cmp, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 0) 125101008beSJay Foad %ofs.5 = add i32 %voffset, 44 126101008beSJay Foad %o5 = call i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64 %o3, i64 %cmp, ptr addrspace(8) %rsrc, i32 %ofs.5, i32 0, i32 0) 127101008beSJay Foad %o6 = call i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64 %o5, i64 %cmp, ptr addrspace(8) %rsrc, i32 4, i32 8188, i32 0) 128101008beSJay Foad %out = sitofp i64 %o6 to float 129101008beSJay Foad ret float %out 130101008beSJay Foad} 131101008beSJay Foad 13288871784SKrzysztof Drewniak;CHECK-LABEL: {{^}}test_volatile: 13388871784SKrzysztof Drewniak;CHECK-NOT: s_waitcnt 13488871784SKrzysztof Drewniak;CHECK: buffer_atomic_add v0, v1, s[0:3], 0 offen glc{{$}} 13588871784SKrzysztof Drewniak;CHECK-DAG: s_waitcnt vmcnt(0) 13688871784SKrzysztof Drewniakdefine amdgpu_ps float @test_volatile(ptr addrspace(8) inreg %rsrc, i32 %data, i32 %voffset) { 13788871784SKrzysztof Drewniakmain_body: 13888871784SKrzysztof Drewniak %t1 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 %data, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 -2147483648) 13988871784SKrzysztof Drewniak %out = bitcast i32 %t1 to float 14088871784SKrzysztof Drewniak ret float %out 14188871784SKrzysztof Drewniak} 14288871784SKrzysztof Drewniak 14388871784SKrzysztof Drewniak;CHECK-LABEL: {{^}}test_volatile_noret: 14488871784SKrzysztof Drewniak;CHECK-NOT: s_waitcnt 14588871784SKrzysztof Drewniak;CHECK: buffer_atomic_add v0, v1, s[0:3], 0 offen{{$}} 14688871784SKrzysztof Drewniakdefine amdgpu_ps void @test_volatile_noret(ptr addrspace(8) inreg %rsrc, i32 %data, i32 %voffset) { 14788871784SKrzysztof Drewniakmain_body: 14888871784SKrzysztof Drewniak %t1 = call i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32 %data, ptr addrspace(8) %rsrc, i32 %voffset, i32 0, i32 -2147483648) 14988871784SKrzysztof Drewniak ret void 15088871784SKrzysztof Drewniak} 15188871784SKrzysztof Drewniak 152faa2c678SKrzysztof Drewniakdeclare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.swap.i32(i32, ptr addrspace(8), i32, i32, i32) #0 153faa2c678SKrzysztof Drewniakdeclare float @llvm.amdgcn.raw.ptr.buffer.atomic.swap.f32(float, ptr addrspace(8), i32, i32, i32) #0 154faa2c678SKrzysztof Drewniakdeclare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.add.i32(i32, ptr addrspace(8), i32, i32, i32) #0 155faa2c678SKrzysztof Drewniakdeclare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.sub.i32(i32, ptr addrspace(8), i32, i32, i32) #0 156faa2c678SKrzysztof Drewniakdeclare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smin.i32(i32, ptr addrspace(8), i32, i32, i32) #0 157faa2c678SKrzysztof Drewniakdeclare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umin.i32(i32, ptr addrspace(8), i32, i32, i32) #0 158faa2c678SKrzysztof Drewniakdeclare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.smax.i32(i32, ptr addrspace(8), i32, i32, i32) #0 159faa2c678SKrzysztof Drewniakdeclare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.umax.i32(i32, ptr addrspace(8), i32, i32, i32) #0 160faa2c678SKrzysztof Drewniakdeclare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.and.i32(i32, ptr addrspace(8), i32, i32, i32) #0 161faa2c678SKrzysztof Drewniakdeclare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.or.i32(i32, ptr addrspace(8), i32, i32, i32) #0 162faa2c678SKrzysztof Drewniakdeclare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.xor.i32(i32, ptr addrspace(8), i32, i32, i32) #0 163faa2c678SKrzysztof Drewniakdeclare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.inc.i32(i32, ptr addrspace(8), i32, i32, i32) #0 164faa2c678SKrzysztof Drewniakdeclare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.dec.i32(i32, ptr addrspace(8), i32, i32, i32) #0 165faa2c678SKrzysztof Drewniakdeclare i32 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i32(i32, i32, ptr addrspace(8), i32, i32, i32) #0 166101008beSJay Foaddeclare i64 @llvm.amdgcn.raw.ptr.buffer.atomic.cmpswap.i64(i64, i64, ptr addrspace(8), i32, i32, i32) #0 167faa2c678SKrzysztof Drewniak 168faa2c678SKrzysztof Drewniakattributes #0 = { nounwind } 169