1; RUN: opt -mtriple amdgcn-- -passes='print<uniformity>' -disable-output %s 2>&1 | FileCheck %s 2 3; CHECK: DIVERGENT: %orig = atomicrmw xchg ptr %ptr, i32 %val seq_cst 4define amdgpu_kernel void @test1(ptr %ptr, i32 %val) #0 { 5 %orig = atomicrmw xchg ptr %ptr, i32 %val seq_cst 6 store i32 %orig, ptr %ptr 7 ret void 8} 9 10; CHECK: DIVERGENT: %orig = cmpxchg ptr %ptr, i32 %cmp, i32 %new seq_cst seq_cst 11define amdgpu_kernel void @test2(ptr %ptr, i32 %cmp, i32 %new) { 12 %orig = cmpxchg ptr %ptr, i32 %cmp, i32 %new seq_cst seq_cst 13 %val = extractvalue { i32, i1 } %orig, 0 14 store i32 %val, ptr %ptr 15 ret void 16} 17 18; CHECK: DIVERGENT: %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) %ptr, i32 %val) 19define amdgpu_kernel void @test_atomic_csub_i32(ptr addrspace(1) %ptr, i32 %val) #0 { 20 %ret = call i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) %ptr, i32 %val) 21 store i32 %ret, ptr addrspace(1) %ptr, align 4 22 ret void 23} 24 25; CHECK: DIVERGENT: %val = call i32 @llvm.amdgcn.atomic.cond.sub.u32.p3(ptr addrspace(3) %gep, i32 %in) 26define amdgpu_kernel void @test_ds_atomic_cond_sub_rtn_u32(ptr addrspace(3) %addr, i32 %in, ptr addrspace(3) %use) #0 { 27entry: 28 %gep = getelementptr i32, ptr addrspace(3) %addr, i32 4 29 %val = call i32 @llvm.amdgcn.atomic.cond.sub.u32.p3(ptr addrspace(3) %gep, i32 %in) 30 store i32 %val, ptr addrspace(3) %use 31 ret void 32} 33 34; CHECK: DIVERGENT: %val = call i32 @llvm.amdgcn.atomic.cond.sub.u32.p0(ptr %gep, i32 %in) 35define amdgpu_kernel void @test_flat_atomic_cond_sub_u32(ptr %addr, i32 %in, ptr %use) #0 { 36entry: 37 %gep = getelementptr i32, ptr %addr, i32 4 38 %val = call i32 @llvm.amdgcn.atomic.cond.sub.u32.p0(ptr %gep, i32 %in) 39 store i32 %val, ptr %use 40 ret void 41} 42 43; CHECK: DIVERGENT: %val = call i32 @llvm.amdgcn.atomic.cond.sub.u32.p1(ptr addrspace(1) %gep, i32 %in) 44define amdgpu_kernel void @test_global_atomic_cond_u32(ptr addrspace(1) %addr, i32 %in, ptr addrspace(1) %use) #0 { 45entry: 46 %gep = getelementptr i32, ptr addrspace(1) %addr, i32 4 47 %val = call i32 @llvm.amdgcn.atomic.cond.sub.u32.p1(ptr addrspace(1) %gep, i32 %in) 48 store i32 %val, ptr addrspace(1) %use 49 ret void 50} 51 52; CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.cond.sub.u32.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0) 53define float @test_raw_buffer_atomic_cond_sub_u32(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { 54entry: 55 %orig = call i32 @llvm.amdgcn.raw.buffer.atomic.cond.sub.u32.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0) 56 %r = bitcast i32 %orig to float 57 ret float %r 58} 59 60; CHECK: DIVERGENT: %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.cond.sub.u32.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) 61define float @test_struct_buffer_atomic_cond_sub_u32(<4 x i32> inreg %rsrc, i32 inreg %data) #0 { 62entry: 63 %orig = call i32 @llvm.amdgcn.struct.buffer.atomic.cond.sub.u32.i32(i32 %data, <4 x i32> %rsrc, i32 0, i32 0, i32 0, i32 0) 64 %r = bitcast i32 %orig to float 65 ret float %r 66} 67 68declare i32 @llvm.amdgcn.global.atomic.csub.p1(ptr addrspace(1) nocapture, i32) #1 69declare i32 @llvm.amdgcn.atomic.cond.sub.u32.p3(ptr addrspace(3), i32) #1 70declare i32 @llvm.amdgcn.atomic.cond.sub.u32.p0(ptr, i32) #1 71declare i32 @llvm.amdgcn.atomic.cond.sub.u32.p1(ptr addrspace(1), i32) #1 72declare i32 @llvm.amdgcn.raw.buffer.atomic.cond.sub.u32.i32(i32, <4 x i32>, i32, i32, i32) #1 73declare i32 @llvm.amdgcn.struct.buffer.atomic.cond.sub.u32.i32(i32, <4 x i32>, i32, i32, i32, i32) #1 74 75attributes #0 = { nounwind } 76attributes #1 = { argmemonly nounwind willreturn } 77