xref: /llvm-project/llvm/test/Transforms/Inline/AMDGPU/amdgpu-inline-alloca-argument-cost.ll (revision dd1df099ae37c2a68a20c96615b7ed7474eacec4)
1; RUN: opt -mtriple=amdgcn--amdhsa -S -passes=inline -inline-threshold=0 -debug-only=inline-cost %s 2>&1 | FileCheck %s
2
3; REQUIRES: asserts
4
5target datalayout = "A5"
6
7; Verify we are properly adding cost of the -amdgpu-inline-arg-alloca-cost to the threshold.
8
9define void @local_access_only(ptr addrspace(5) %p, i32 %idx) {
10  %arrayidx = getelementptr inbounds [64 x float], ptr addrspace(5) %p, i32 0, i32 %idx
11  %value = load float, ptr addrspace(5) %arrayidx
12  store float %value , ptr addrspace(5) %arrayidx, align 4
13  ret void
14}
15
16; Below the cutoff, the alloca cost is 0, and only the cost of the instructions saved by sroa is counted
17; CHECK: Analyzing call of local_access_only... (caller:test_inliner_sroa_single_below_cutoff)
18; CHECK: NumAllocaArgs: 1
19; CHECK: SROACostSavings: 10
20; CHECK: SROACostSavingsLost: 0
21; CHECK: Threshold: 66000
22define amdgpu_kernel void @test_inliner_sroa_single_below_cutoff(ptr addrspace(1) %a, i32 %n) {
23entry:
24  %pvt_arr = alloca [64 x float], align 4, addrspace(5)
25  call void @local_access_only(ptr addrspace(5) %pvt_arr, i32 4)
26  ret void
27}
28
29; Above the cutoff, attribute a cost to the alloca
30; CHECK: Analyzing call of local_access_only... (caller:test_inliner_sroa_single_above_cutoff)
31; CHECK: NumAllocaArgs: 1
32; CHECK: SROACostSavings: 66010
33; CHECK: SROACostSavingsLost: 0
34; CHECK: Threshold: 66000
35define amdgpu_kernel void @test_inliner_sroa_single_above_cutoff(ptr addrspace(1) %a, i32 %n) {
36entry:
37  %pvt_arr = alloca [65 x float], align 4, addrspace(5)
38  call void @local_access_only(ptr addrspace(5) %pvt_arr, i32 4)
39  ret void
40}
41
42define void @use_first_externally(ptr addrspace(5) %p1, ptr addrspace(5) %p2) {
43  call void @external(ptr addrspace(5) %p1)
44  %arrayidx = getelementptr inbounds [64 x float], ptr addrspace(5) %p2, i32 0, i32 7
45  %value = load float, ptr addrspace(5) %arrayidx
46  store float %value , ptr addrspace(5) %arrayidx, align 4
47  ret void
48}
49
50define void @use_both_externally(ptr addrspace(5) %p1, ptr addrspace(5) %p2) {
51  call void @external(ptr addrspace(5) %p1)
52  call void @external(ptr addrspace(5) %p2)
53  ret void
54}
55
56; One array cannot get handled by SROA
57; CHECK: Analyzing call of use_first_externally... (caller:test_inliner_sroa_double)
58; CHECK: NumAllocaArgs: 2
59; CHECK: SROACostSavings: 32502
60; CHECK: SROACostSavingsLost: 33507
61; CHECK: Threshold: 66000
62define amdgpu_kernel void @test_inliner_sroa_double() {
63entry:
64  %pvt_arr1 = alloca [33 x float], align 4, addrspace(5)
65  %pvt_arr2 = alloca [32 x float], align 4, addrspace(5)
66  call void @use_first_externally(ptr addrspace(5) %pvt_arr1, ptr addrspace(5) %pvt_arr2)
67  ret void
68}
69
70; The two arrays cannot get handled by SROA
71; CHECK: Analyzing call of use_both_externally... (caller:test_inliner_no_sroa)
72; CHECK: NumAllocaArgs: 2
73; CHECK: SROACostSavings: 0
74; CHECK: SROACostSavingsLost: 65999
75; CHECK: Threshold: 66000
76define amdgpu_kernel void @test_inliner_no_sroa() {
77entry:
78  %pvt_arr1 = alloca [33 x float], align 4, addrspace(5)
79  %pvt_arr2 = alloca [32 x float], align 4, addrspace(5)
80  call void @use_both_externally(ptr addrspace(5) %pvt_arr1, ptr addrspace(5) %pvt_arr2)
81  ret void
82}
83
84; No private arrays
85; CHECK: Analyzing call of use_both_externally... (caller:test_inliner_no_alloc)
86; CHECK: NumAllocaArgs: 0
87; CHECK: SROACostSavings: 0
88; CHECK: SROACostSavingsLost: 0
89; CHECK: Threshold: 0
90define amdgpu_kernel void @test_inliner_no_alloc(ptr addrspace(5) %a, ptr addrspace(5) %b) {
91entry:
92  call void @use_both_externally(ptr addrspace(5) %a, ptr addrspace(5) %b)
93  ret void
94}
95
96declare void @external(ptr addrspace(5) %p)
97