1; RUN: opt -passes=amdgpu-attributor < %s | llc | FileCheck %s 2 3target triple = "amdgcn-amd-amdhsa" 4 5; The call to intrinsic implicitarg_ptr reaches a load through a phi. The 6; offsets of the phi cannot be determined, and hence the attirbutor assumes that 7; hostcall is in use. 8 9; CHECK-LABEL: amdhsa.kernels: 10; CHECK: .value_kind: hidden_hostcall_buffer 11; CHECK: .value_kind: hidden_multigrid_sync_arg 12; CHECK-LABEL: .name: kernel_1 13 14define amdgpu_kernel void @kernel_1(ptr addrspace(1) %a, i64 %index1, i64 %index2, i1 %cond) { 15entry: 16 %tmp7 = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 17 br i1 %cond, label %old, label %new 18 19old: ; preds = %entry 20 %tmp4 = getelementptr i8, ptr addrspace(4) %tmp7, i64 %index1 21 br label %join 22 23new: ; preds = %entry 24 %tmp12 = getelementptr inbounds i8, ptr addrspace(4) %tmp7, i64 %index2 25 br label %join 26 27join: ; preds = %new, %old 28 %.in.in.in = phi ptr addrspace(4) [ %tmp12, %new ], [ %tmp4, %old ] 29 30 ;;; THIS USE is where the offset into implicitarg_ptr is unknown 31 %.in = load i16, ptr addrspace(4) %.in.in.in, align 2 32 33 %idx.ext = sext i16 %.in to i64 34 %add.ptr3 = getelementptr inbounds i32, ptr addrspace(1) %a, i64 %idx.ext 35 %tmp16 = atomicrmw add ptr addrspace(1) %add.ptr3, i32 15 syncscope("agent-one-as") monotonic, align 4 36 ret void 37} 38 39; The call to intrinsic implicitarg_ptr is combined with an offset produced by 40; select'ing between two constants, before it is eventually used in a GEP to 41; form the address of a load. This test ensures that AAPointerInfo can look 42; through the select to maintain a set of indices, so that it can precisely 43; determine that hostcall and other expensive implicit args are not in use. 44 45; CHECK-NOT: hidden_hostcall_buffer 46; CHECK-NOT: hidden_multigrid_sync_arg 47; CHECK-LABEL: .name: kernel_2 48 49define amdgpu_kernel void @kernel_2(ptr addrspace(1) %a, i1 %cond) { 50entry: 51 %tmp7 = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 52 %tmp5 = select i1 %cond, i64 12, i64 18 53 %tmp6 = getelementptr inbounds i8, ptr addrspace(4) %tmp7, i64 %tmp5 54 55 ;;; THIS USE is where multiple offsets are possible, relative to implicitarg_ptr 56 %tmp9 = load i16, ptr addrspace(4) %tmp6, align 2 57 58 %idx.ext = sext i16 %tmp9 to i64 59 %add.ptr3 = getelementptr inbounds i32, ptr addrspace(1) %a, i64 %idx.ext 60 %tmp16 = atomicrmw add ptr addrspace(1) %add.ptr3, i32 15 syncscope("agent-one-as") monotonic, align 4 61 ret void 62} 63 64; CHECK-NOT: hidden_hostcall_buffer 65; CHECK-NOT: hidden_multigrid_sync_arg 66; CHECK-LABEL: .name: kernel_3 67 68define amdgpu_kernel void @kernel_3(ptr addrspace(1) %a, i1 %cond) { 69entry: 70 %tmp7 = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 71 br i1 %cond, label %old, label %new 72 73old: ; preds = %entry 74 %tmp4 = getelementptr i8, ptr addrspace(4) %tmp7, i64 12 75 br label %join 76 77new: ; preds = %entry 78 %tmp12 = getelementptr inbounds i8, ptr addrspace(4) %tmp7, i64 18 79 br label %join 80 81join: ; preds = %new, %old 82 %.in.in.in = phi ptr addrspace(4) [ %tmp12, %new ], [ %tmp4, %old ] 83 84 ;;; THIS USE of implicitarg_ptr should not produce hostcall metadata 85 %.in = load i16, ptr addrspace(4) %.in.in.in, align 2 86 87 %idx.ext = sext i16 %.in to i64 88 %add.ptr3 = getelementptr inbounds i32, ptr addrspace(1) %a, i64 %idx.ext 89 %tmp16 = atomicrmw add ptr addrspace(1) %add.ptr3, i32 15 syncscope("agent-one-as") monotonic, align 4 90 ret void 91} 92 93declare i32 @llvm.amdgcn.workitem.id.x() 94 95declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 96 97declare i32 @llvm.amdgcn.workgroup.id.x() 98