1; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii -passes=loop-unroll -unroll-threshold=100 -unroll-peel-count=0 -unroll-allow-partial=false -unroll-max-iteration-count-to-analyze=16 < %s | FileCheck %s 2 3; CHECK-LABEL: @test_intrinsic_call_cost( 4; CHECK-NOT: br i1 5define amdgpu_kernel void @test_intrinsic_call_cost(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture %in) #0 { 6entry: 7 br label %for.body 8 9for.body: 10 %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 11 %sum.02 = phi float [ %fmul, %for.body ], [ 0.0, %entry ] 12 %arrayidx.in = getelementptr inbounds float, ptr addrspace(1) %in, i32 %indvars.iv 13 %arrayidx.out = getelementptr inbounds float, ptr addrspace(1) %out, i32 %indvars.iv 14 %load = load float, ptr addrspace(1) %arrayidx.in 15 %call = call float @llvm.minnum.f32(float %load, float 1.0); 16 %fmul = fmul float %call, %sum.02 17 store float %fmul, ptr addrspace(1) %arrayidx.out 18 %indvars.iv.next = add i32 %indvars.iv, 1 19 %exitcond = icmp eq i32 %indvars.iv.next, 16 20 br i1 %exitcond, label %for.end, label %for.body 21 22for.end: 23 ret void 24} 25 26; CHECK-LABEL: @test_func_call_cost( 27; CHECK: br i1 %exitcond 28define amdgpu_kernel void @test_func_call_cost(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture %in) #0 { 29entry: 30 br label %for.body 31 32for.body: 33 %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 34 %sum.02 = phi float [ %fmul, %for.body ], [ 0.0, %entry ] 35 %arrayidx.in = getelementptr inbounds float, ptr addrspace(1) %in, i32 %indvars.iv 36 %arrayidx.out = getelementptr inbounds float, ptr addrspace(1) %out, i32 %indvars.iv 37 %load = load float, ptr addrspace(1) %arrayidx.in 38 %fptr = load ptr, ptr addrspace(4) null 39 %call = tail call float %fptr(float %load, float 1.0) 40 %fmul = fmul float %call, %sum.02 41 store float %fmul, ptr addrspace(1) %arrayidx.out 42 %indvars.iv.next = add i32 %indvars.iv, 1 43 %exitcond = icmp eq i32 %indvars.iv.next, 16 44 br i1 %exitcond, label %for.end, label %for.body 45 46for.end: 47 ret void 48} 49 50; CHECK-LABEL: @test_indirect_call_cost( 51; CHECK: br i1 %exitcond 52define amdgpu_kernel void @test_indirect_call_cost(ptr addrspace(1) noalias nocapture %out, ptr addrspace(1) noalias nocapture %in) #0 { 53entry: 54 br label %for.body 55 56for.body: 57 %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 58 %sum.02 = phi float [ %fmul, %for.body ], [ 0.0, %entry ] 59 %arrayidx.in = getelementptr inbounds float, ptr addrspace(1) %in, i32 %indvars.iv 60 %arrayidx.out = getelementptr inbounds float, ptr addrspace(1) %out, i32 %indvars.iv 61 %load = load float, ptr addrspace(1) %arrayidx.in 62 %min = call float @func(float %load, float 1.0); 63 %fmul = fmul float %min, %sum.02 64 store float %fmul, ptr addrspace(1) %arrayidx.out 65 %indvars.iv.next = add i32 %indvars.iv, 1 66 %exitcond = icmp eq i32 %indvars.iv.next, 16 67 br i1 %exitcond, label %for.end, label %for.body 68 69for.end: 70 ret void 71} 72 73declare float @llvm.minnum.f32(float, float) #1 74declare float @func(float, float) #1 75 76attributes #0 = { nounwind } 77attributes #1 = { nounwind readnone speculatable } 78