1; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -O3 -S -inline-threshold=1 < %s | FileCheck -check-prefixes=GCN,GCN-INL1,GCN-MAXBBDEF %s 2; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -O3 -S < %s | FileCheck -check-prefixes=GCN,GCN-INLDEF,GCN-MAXBBDEF %s 3; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -passes='default<O3>' -S -inline-threshold=1 < %s | FileCheck -check-prefixes=GCN,GCN-INL1,GCN-MAXBBDEF %s 4; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -passes='default<O3>' -S < %s | FileCheck -check-prefixes=GCN,GCN-INLDEF,GCN-MAXBBDEF %s 5; RUN: opt -mtriple=amdgcn--amdhsa -data-layout=A5 -passes='default<O3>' -S -amdgpu-inline-max-bb=1 < %s | FileCheck -check-prefixes=GCN,GCN-MAXBB1 %s 6 7define coldcc float @foo(float %x, float %y) { 8entry: 9 %cmp = fcmp ogt float %x, 0.000000e+00 10 %div = fdiv float %y, %x 11 %mul = fmul float %x, %y 12 %cond = select i1 %cmp, float %div, float %mul 13 ret float %cond 14} 15 16define coldcc void @foo_private_ptr(ptr addrspace(5) nocapture %p) { 17entry: 18 %tmp1 = load float, ptr addrspace(5) %p, align 4 19 %cmp = fcmp ogt float %tmp1, 1.000000e+00 20 br i1 %cmp, label %if.then, label %if.end 21 22if.then: ; preds = %entry 23 %div = fdiv float 1.000000e+00, %tmp1 24 store float %div, ptr addrspace(5) %p, align 4 25 br label %if.end 26 27if.end: ; preds = %if.then, %entry 28 ret void 29} 30 31define coldcc void @foo_private_ptr2(ptr addrspace(5) nocapture %p1, ptr addrspace(5) nocapture %p2) { 32entry: 33 call void @forbid_sroa(ptr addrspace(5) %p1) 34 call void @forbid_sroa(ptr addrspace(5) %p2) 35 %tmp1 = load float, ptr addrspace(5) %p1, align 4 36 %cmp = fcmp ogt float %tmp1, 1.000000e+00 37 br i1 %cmp, label %if.then, label %if.end 38 39if.then: 40 %div = fdiv float 2.000000e+00, %tmp1 41 store float %div, ptr addrspace(5) %p2, align 4 42 br label %if.end 43 44if.end: 45 ret void 46} 47 48define float @sin_wrapper(float %x) { 49bb: 50 %call = tail call float @_Z3sinf(float %x) 51 ret float %call 52} 53 54define void @foo_noinline(ptr addrspace(5) nocapture %p) #0 { 55entry: 56 %tmp1 = load float, ptr addrspace(5) %p, align 4 57 %mul = fmul float %tmp1, 2.000000e+00 58 store float %mul, ptr addrspace(5) %p, align 4 59 ret void 60} 61 62; GCN: define amdgpu_kernel void @test_inliner( 63; GCN-INL1: %c1 = tail call coldcc float @foo( 64; GCN-INLDEF: %cmp.i = fcmp ogt float %tmp2, 0.000000e+00 65; GCN-MAXBBDEF: %div.i{{[0-9]*}} = fdiv float 1.000000e+00, %c 66; GCN-MAXBBDEF: %div.i{{[0-9]*}} = fdiv float 2.000000e+00, %tmp1.i 67; GCN-MAXBB1: call coldcc void @foo_private_ptr 68; GCN-MAXBB1: call coldcc void @foo_private_ptr2 69; GCN: call void @foo_noinline( 70; GCN: tail call float @_Z3sinf( 71define amdgpu_kernel void @test_inliner(ptr addrspace(1) nocapture %a, i32 %n) { 72entry: 73 %pvt_arr = alloca [64 x float], align 4, addrspace(5) 74 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() 75 %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i32 %tid 76 %tmp2 = load float, ptr addrspace(1) %arrayidx, align 4 77 %add = add i32 %tid, 1 78 %arrayidx2 = getelementptr inbounds float, ptr addrspace(1) %a, i32 %add 79 %tmp5 = load float, ptr addrspace(1) %arrayidx2, align 4 80 %c1 = tail call coldcc float @foo(float %tmp2, float %tmp5) 81 %or = or i32 %tid, %n 82 %arrayidx5 = getelementptr inbounds [64 x float], ptr addrspace(5) %pvt_arr, i32 0, i32 %or 83 store float %c1, ptr addrspace(5) %arrayidx5, align 4 84 %arrayidx7 = getelementptr inbounds [64 x float], ptr addrspace(5) %pvt_arr, i32 0, i32 %or 85 call coldcc void @foo_private_ptr(ptr addrspace(5) %arrayidx7) 86 %arrayidx8 = getelementptr inbounds [64 x float], ptr addrspace(5) %pvt_arr, i32 0, i32 1 87 %arrayidx9 = getelementptr inbounds [64 x float], ptr addrspace(5) %pvt_arr, i32 0, i32 2 88 call coldcc void @foo_private_ptr2(ptr addrspace(5) %arrayidx8, ptr addrspace(5) %arrayidx9) 89 call void @foo_noinline(ptr addrspace(5) %arrayidx7) 90 %and = and i32 %tid, %n 91 %arrayidx11 = getelementptr inbounds [64 x float], ptr addrspace(5) %pvt_arr, i32 0, i32 %and 92 %tmp12 = load float, ptr addrspace(5) %arrayidx11, align 4 93 %c2 = call float @sin_wrapper(float %tmp12) 94 store float %c2, ptr addrspace(5) %arrayidx7, align 4 95 %xor = xor i32 %tid, %n 96 %arrayidx16 = getelementptr inbounds [64 x float], ptr addrspace(5) %pvt_arr, i32 0, i32 %xor 97 %tmp16 = load float, ptr addrspace(5) %arrayidx16, align 4 98 store float %tmp16, ptr addrspace(1) %arrayidx, align 4 99 ret void 100} 101 102; GCN: define amdgpu_kernel void @test_inliner_multi_pvt_ptr( 103; GCN-MAXBBDEF: %div.i{{[0-9]*}} = fdiv float 2.000000e+00, %tmp1.i 104; GCN-MAXBB1: call coldcc void @foo_private_ptr2 105define amdgpu_kernel void @test_inliner_multi_pvt_ptr(ptr addrspace(1) nocapture %a, i32 %n, float %v) { 106entry: 107 %pvt_arr1 = alloca [32 x float], align 4, addrspace(5) 108 %pvt_arr2 = alloca [32 x float], align 4, addrspace(5) 109 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() 110 %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i32 %tid 111 %or = or i32 %tid, %n 112 %arrayidx4 = getelementptr inbounds [32 x float], ptr addrspace(5) %pvt_arr1, i32 0, i32 %or 113 %arrayidx5 = getelementptr inbounds [32 x float], ptr addrspace(5) %pvt_arr2, i32 0, i32 %or 114 store float %v, ptr addrspace(5) %arrayidx4, align 4 115 store float %v, ptr addrspace(5) %arrayidx5, align 4 116 %arrayidx8 = getelementptr inbounds [32 x float], ptr addrspace(5) %pvt_arr1, i32 0, i32 1 117 %arrayidx9 = getelementptr inbounds [32 x float], ptr addrspace(5) %pvt_arr2, i32 0, i32 2 118 call coldcc void @foo_private_ptr2(ptr addrspace(5) %arrayidx8, ptr addrspace(5) %arrayidx9) 119 %xor = xor i32 %tid, %n 120 %arrayidx15 = getelementptr inbounds [32 x float], ptr addrspace(5) %pvt_arr1, i32 0, i32 %xor 121 %arrayidx16 = getelementptr inbounds [32 x float], ptr addrspace(5) %pvt_arr2, i32 0, i32 %xor 122 %tmp15 = load float, ptr addrspace(5) %arrayidx15, align 4 123 %tmp16 = load float, ptr addrspace(5) %arrayidx16, align 4 124 %tmp17 = fadd float %tmp15, %tmp16 125 store float %tmp17, ptr addrspace(1) %arrayidx, align 4 126 ret void 127} 128 129; GCN: define amdgpu_kernel void @test_inliner_multi_pvt_ptr_cutoff( 130; GCN-INL1: call coldcc void @foo_private_ptr2 131; GCN-INLDEF: %div.i{{[0-9]*}} = fdiv float 2.000000e+00, %tmp1.i 132define amdgpu_kernel void @test_inliner_multi_pvt_ptr_cutoff(ptr addrspace(1) nocapture %a, i32 %n, float %v) { 133entry: 134 %pvt_arr1 = alloca [32 x float], align 4, addrspace(5) 135 %pvt_arr2 = alloca [33 x float], align 4, addrspace(5) 136 %tid = tail call i32 @llvm.amdgcn.workitem.id.x() 137 %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i32 %tid 138 %or = or i32 %tid, %n 139 %arrayidx4 = getelementptr inbounds [32 x float], ptr addrspace(5) %pvt_arr1, i32 0, i32 %or 140 %arrayidx5 = getelementptr inbounds [33 x float], ptr addrspace(5) %pvt_arr2, i32 0, i32 %or 141 store float %v, ptr addrspace(5) %arrayidx4, align 4 142 store float %v, ptr addrspace(5) %arrayidx5, align 4 143 %arrayidx8 = getelementptr inbounds [32 x float], ptr addrspace(5) %pvt_arr1, i32 0, i32 1 144 %arrayidx9 = getelementptr inbounds [33 x float], ptr addrspace(5) %pvt_arr2, i32 0, i32 2 145 call coldcc void @foo_private_ptr2(ptr addrspace(5) %arrayidx8, ptr addrspace(5) %arrayidx9) 146 %xor = xor i32 %tid, %n 147 %arrayidx15 = getelementptr inbounds [32 x float], ptr addrspace(5) %pvt_arr1, i32 0, i32 %xor 148 %arrayidx16 = getelementptr inbounds [33 x float], ptr addrspace(5) %pvt_arr2, i32 0, i32 %xor 149 %tmp15 = load float, ptr addrspace(5) %arrayidx15, align 4 150 %tmp16 = load float, ptr addrspace(5) %arrayidx16, align 4 151 %tmp17 = fadd float %tmp15, %tmp16 152 store float %tmp17, ptr addrspace(1) %arrayidx, align 4 153 ret void 154} 155 156; GCN: define amdgpu_kernel void @test_inliner_maxbb_singlebb( 157; GCN: tail call float @_Z3sinf 158define amdgpu_kernel void @test_inliner_maxbb_singlebb(ptr addrspace(1) nocapture %a, i32 %n) { 159entry: 160 %cmp = icmp eq i32 %n, 1 161 br i1 %cmp, label %bb.1, label %bb.2 162 br label %bb.1 163 164bb.1: 165 store float 1.0, ptr undef 166 br label %bb.2 167 168bb.2: 169 %c = call float @sin_wrapper(float 1.0) 170 store float %c, ptr addrspace(1) %a 171 ret void 172} 173 174declare i32 @llvm.amdgcn.workitem.id.x() #1 175declare float @_Z3sinf(float) #1 176declare void @forbid_sroa(ptr addrspace(5) nocapture %p) 177 178attributes #0 = { noinline } 179attributes #1 = { nounwind readnone } 180